diff --git a/.gitattributes b/.gitattributes index 9317e778def9dc143a98020f8f5bbb59ececc30c..1b7598a4ce0cbeca7db07084b3981199df74a770 100644 --- a/.gitattributes +++ b/.gitattributes @@ -57,3 +57,57 @@ barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer.json filter=lfs barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_config.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5acf1cc6d54382f85d96c9747061de68fdf82412 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "k_proj", + "down_proj", + "q_proj", + "o_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_model.safetensors b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffcb9e2266f67afad51b9ae4831edbd30dd52010 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7755f47750ba602d4c0d1bd4d6eab9c703219eafa33741dd03c79b6c0f9d51b9 +size 335605144 diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/all_results.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4891532bcb25bb33df70285b28e73881f86722f0 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 3.9356831750946816e+17, + "train_loss": 3.96218955618414, + "train_runtime": 277.7455, + "train_samples": 2802, + "train_samples_per_second": 10.088, + "train_steps_per_second": 0.634 +} \ No newline at end of file diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/chat_template.jinja b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/config.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/generation_config.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..86546ab87e4eb79c96f00ffe17ad89ca00e2ecc1 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.52.4" +} diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/special_tokens_map.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer_config.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/train_results.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4891532bcb25bb33df70285b28e73881f86722f0 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 3.9356831750946816e+17, + "train_loss": 3.96218955618414, + "train_runtime": 277.7455, + "train_samples": 2802, + "train_samples_per_second": 10.088, + "train_steps_per_second": 0.634 +} \ No newline at end of file diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/trainer_state.json b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7dd48451f490ee97a0e5db6f1712b252cc1b7be6 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/trainer_state.json @@ -0,0 +1,288 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 176, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.028530670470756064, + "grad_norm": 98.07823944091797, + "learning_rate": 6.666666666666667e-06, + "loss": 11.8227, + "step": 5 + }, + { + "epoch": 0.05706134094151213, + "grad_norm": 48.8634033203125, + "learning_rate": 1.5e-05, + "loss": 10.705, + "step": 10 + }, + { + "epoch": 0.08559201141226819, + "grad_norm": 22.390178680419922, + "learning_rate": 2.3333333333333336e-05, + "loss": 7.2769, + "step": 15 + }, + { + "epoch": 0.11412268188302425, + "grad_norm": 11.743159294128418, + "learning_rate": 2.9997034946550984e-05, + "loss": 5.1852, + "step": 20 + }, + { + "epoch": 0.14265335235378032, + "grad_norm": 11.406346321105957, + "learning_rate": 2.98933811055296e-05, + "loss": 4.8695, + "step": 25 + }, + { + "epoch": 0.17118402282453637, + "grad_norm": 9.969062805175781, + "learning_rate": 2.9642644716285765e-05, + "loss": 4.1906, + "step": 30 + }, + { + "epoch": 0.19971469329529243, + "grad_norm": 6.9679951667785645, + "learning_rate": 2.924730197591674e-05, + "loss": 3.4173, + "step": 35 + }, + { + "epoch": 0.2282453637660485, + "grad_norm": 13.462809562683105, + "learning_rate": 2.871125717031052e-05, + "loss": 4.063, + "step": 40 + }, + { + "epoch": 0.25677603423680456, + "grad_norm": 11.700884819030762, + "learning_rate": 2.8039804116593743e-05, + "loss": 3.9671, + "step": 45 + }, + { + "epoch": 0.28530670470756064, + "grad_norm": 9.339689254760742, + "learning_rate": 2.7239573882987418e-05, + "loss": 3.4171, + "step": 50 + }, + { + "epoch": 0.31383737517831667, + "grad_norm": 7.5099921226501465, + "learning_rate": 2.6318469302373453e-05, + "loss": 3.5813, + "step": 55 + }, + { + "epoch": 0.34236804564907275, + "grad_norm": 6.536746025085449, + "learning_rate": 2.52855869262962e-05, + "loss": 3.5958, + "step": 60 + }, + { + "epoch": 0.37089871611982883, + "grad_norm": 8.016010284423828, + "learning_rate": 2.4151127190157864e-05, + "loss": 3.6182, + "step": 65 + }, + { + "epoch": 0.39942938659058486, + "grad_norm": 6.370848178863525, + "learning_rate": 2.2926293676789295e-05, + "loss": 3.6103, + "step": 70 + }, + { + "epoch": 0.42796005706134094, + "grad_norm": 8.359729766845703, + "learning_rate": 2.162318247323868e-05, + "loss": 3.5373, + "step": 75 + }, + { + "epoch": 0.456490727532097, + "grad_norm": 6.346542835235596, + "learning_rate": 2.0254662713457366e-05, + "loss": 3.3341, + "step": 80 + }, + { + "epoch": 0.48502139800285304, + "grad_norm": 13.426467895507812, + "learning_rate": 1.883424948660712e-05, + "loss": 3.3211, + "step": 85 + }, + { + "epoch": 0.5135520684736091, + "grad_norm": 7.283843994140625, + "learning_rate": 1.7375970366108225e-05, + "loss": 3.2597, + "step": 90 + }, + { + "epoch": 0.5420827389443652, + "grad_norm": 9.1486234664917, + "learning_rate": 1.5894226877547298e-05, + "loss": 3.4157, + "step": 95 + }, + { + "epoch": 0.5706134094151213, + "grad_norm": 8.91500186920166, + "learning_rate": 1.4403652273546118e-05, + "loss": 3.1861, + "step": 100 + }, + { + "epoch": 0.5991440798858774, + "grad_norm": 8.01885986328125, + "learning_rate": 1.2918967020163978e-05, + "loss": 3.2207, + "step": 105 + }, + { + "epoch": 0.6276747503566333, + "grad_norm": 13.87535285949707, + "learning_rate": 1.1454833422006428e-05, + "loss": 3.2422, + "step": 110 + }, + { + "epoch": 0.6562054208273894, + "grad_norm": 6.999363422393799, + "learning_rate": 1.0025710821718983e-05, + "loss": 3.2623, + "step": 115 + }, + { + "epoch": 0.6847360912981455, + "grad_norm": 12.511735916137695, + "learning_rate": 8.645712803872084e-06, + "loss": 3.4606, + "step": 120 + }, + { + "epoch": 0.7132667617689016, + "grad_norm": 7.841530799865723, + "learning_rate": 7.3284678134486685e-06, + "loss": 2.9957, + "step": 125 + }, + { + "epoch": 0.7417974322396577, + "grad_norm": 7.247511863708496, + "learning_rate": 6.086984565424345e-06, + "loss": 2.9632, + "step": 130 + }, + { + "epoch": 0.7703281027104137, + "grad_norm": 6.871656894683838, + "learning_rate": 4.933523574614447e-06, + "loss": 3.1, + "step": 135 + }, + { + "epoch": 0.7988587731811697, + "grad_norm": 9.417755126953125, + "learning_rate": 3.879476074520731e-06, + "loss": 2.9654, + "step": 140 + }, + { + "epoch": 0.8273894436519258, + "grad_norm": 6.606371879577637, + "learning_rate": 2.935251520938528e-06, + "loss": 2.7702, + "step": 145 + }, + { + "epoch": 0.8559201141226819, + "grad_norm": 10.26504898071289, + "learning_rate": 2.1101747913050855e-06, + "loss": 2.9336, + "step": 150 + }, + { + "epoch": 0.884450784593438, + "grad_norm": 10.80896282196045, + "learning_rate": 1.412394095017151e-06, + "loss": 3.0025, + "step": 155 + }, + { + "epoch": 0.912981455064194, + "grad_norm": 8.550090789794922, + "learning_rate": 8.488005041679842e-07, + "loss": 2.9469, + "step": 160 + }, + { + "epoch": 0.9415121255349501, + "grad_norm": 7.228885173797607, + "learning_rate": 4.2495989939384916e-07, + "loss": 3.1122, + "step": 165 + }, + { + "epoch": 0.9700427960057061, + "grad_norm": 7.973071575164795, + "learning_rate": 1.4505800291247207e-07, + "loss": 2.9233, + "step": 170 + }, + { + "epoch": 0.9985734664764622, + "grad_norm": 7.4301252365112305, + "learning_rate": 1.1859041590472352e-08, + "loss": 3.0535, + "step": 175 + }, + { + "epoch": 1.0, + "step": 176, + "total_flos": 3.9356831750946816e+17, + "train_loss": 3.96218955618414, + "train_runtime": 277.7455, + "train_samples_per_second": 10.088, + "train_steps_per_second": 0.634 + } + ], + "logging_steps": 5, + "max_steps": 176, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.9356831750946816e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/training_args.bin b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba742d0237f12d961e1ac26802664452b034bc27 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6862db60ffd6c2d4a1dce55316821419a6e148ca0aecc445e6cdfd3caffc17fe +size 8081 diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_config.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2e7b275f35d1414e43af4e5007687f0d3ff9e2f8 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "k_proj", + "v_proj", + "o_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_model.safetensors b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5fc86f61e23d65beae7fcfd1868870c6b114cba2 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1bac12a9912ed22832f19e576e20188109ebeffec16a0490193d4d1d0fecb43 +size 335605144 diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/all_results.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9b12a911f331cfd6c805ea166f764f49804b64a9 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 3.941173665328005e+17, + "train_loss": 1.885472126643766, + "train_runtime": 151.7112, + "train_samples": 2802, + "train_samples_per_second": 18.469, + "train_steps_per_second": 1.16 +} \ No newline at end of file diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/chat_template.jinja b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/config.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/generation_config.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..86546ab87e4eb79c96f00ffe17ad89ca00e2ecc1 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.52.4" +} diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/special_tokens_map.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer_config.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/train_results.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9b12a911f331cfd6c805ea166f764f49804b64a9 --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 3.941173665328005e+17, + "train_loss": 1.885472126643766, + "train_runtime": 151.7112, + "train_samples": 2802, + "train_samples_per_second": 18.469, + "train_steps_per_second": 1.16 +} \ No newline at end of file diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/trainer_state.json b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d161c3aac3784bce575de3d3a06720062372674d --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/trainer_state.json @@ -0,0 +1,288 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 176, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02849002849002849, + "grad_norm": 41.54158401489258, + "learning_rate": 6.666666666666667e-06, + "loss": 4.6986, + "step": 5 + }, + { + "epoch": 0.05698005698005698, + "grad_norm": 22.764928817749023, + "learning_rate": 1.5e-05, + "loss": 4.3109, + "step": 10 + }, + { + "epoch": 0.08547008547008547, + "grad_norm": 10.573023796081543, + "learning_rate": 2.3333333333333336e-05, + "loss": 3.3624, + "step": 15 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 6.423072338104248, + "learning_rate": 2.9997034946550984e-05, + "loss": 2.5337, + "step": 20 + }, + { + "epoch": 0.14245014245014245, + "grad_norm": 6.361787796020508, + "learning_rate": 2.98933811055296e-05, + "loss": 2.4237, + "step": 25 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 4.749721527099609, + "learning_rate": 2.9642644716285765e-05, + "loss": 2.0534, + "step": 30 + }, + { + "epoch": 0.19943019943019943, + "grad_norm": 3.451364517211914, + "learning_rate": 2.924730197591674e-05, + "loss": 1.7088, + "step": 35 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 8.959653854370117, + "learning_rate": 2.871125717031052e-05, + "loss": 2.0079, + "step": 40 + }, + { + "epoch": 0.2564102564102564, + "grad_norm": 7.419877052307129, + "learning_rate": 2.8039804116593743e-05, + "loss": 1.9297, + "step": 45 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 4.485551357269287, + "learning_rate": 2.7239573882987418e-05, + "loss": 1.7006, + "step": 50 + }, + { + "epoch": 0.31339031339031337, + "grad_norm": 4.072010517120361, + "learning_rate": 2.6318469302373453e-05, + "loss": 1.7362, + "step": 55 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 3.5165116786956787, + "learning_rate": 2.52855869262962e-05, + "loss": 1.791, + "step": 60 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 3.970125198364258, + "learning_rate": 2.4151127190157864e-05, + "loss": 1.8049, + "step": 65 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 3.4378178119659424, + "learning_rate": 2.2926293676789295e-05, + "loss": 1.7725, + "step": 70 + }, + { + "epoch": 0.42735042735042733, + "grad_norm": 4.609078407287598, + "learning_rate": 2.162318247323868e-05, + "loss": 1.7165, + "step": 75 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 3.251359224319458, + "learning_rate": 2.0254662713457366e-05, + "loss": 1.6442, + "step": 80 + }, + { + "epoch": 0.4843304843304843, + "grad_norm": 5.737546443939209, + "learning_rate": 1.883424948660712e-05, + "loss": 1.6232, + "step": 85 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 4.057260036468506, + "learning_rate": 1.7375970366108225e-05, + "loss": 1.5908, + "step": 90 + }, + { + "epoch": 0.5413105413105413, + "grad_norm": 4.2831315994262695, + "learning_rate": 1.5894226877547298e-05, + "loss": 1.7128, + "step": 95 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 3.8855299949645996, + "learning_rate": 1.4403652273546118e-05, + "loss": 1.5593, + "step": 100 + }, + { + "epoch": 0.5982905982905983, + "grad_norm": 3.7846293449401855, + "learning_rate": 1.2918967020163978e-05, + "loss": 1.5821, + "step": 105 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 6.312170505523682, + "learning_rate": 1.1454833422006428e-05, + "loss": 1.5836, + "step": 110 + }, + { + "epoch": 0.6552706552706553, + "grad_norm": 3.63012433052063, + "learning_rate": 1.0025710821718983e-05, + "loss": 1.584, + "step": 115 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 5.612448692321777, + "learning_rate": 8.645712803872084e-06, + "loss": 1.7054, + "step": 120 + }, + { + "epoch": 0.7122507122507122, + "grad_norm": 3.9694271087646484, + "learning_rate": 7.3284678134486685e-06, + "loss": 1.4795, + "step": 125 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 3.8035147190093994, + "learning_rate": 6.086984565424345e-06, + "loss": 1.4751, + "step": 130 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 3.422909736633301, + "learning_rate": 4.933523574614447e-06, + "loss": 1.5283, + "step": 135 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 4.461482524871826, + "learning_rate": 3.879476074520731e-06, + "loss": 1.4482, + "step": 140 + }, + { + "epoch": 0.8262108262108262, + "grad_norm": 3.229851722717285, + "learning_rate": 2.935251520938528e-06, + "loss": 1.3772, + "step": 145 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 4.533471584320068, + "learning_rate": 2.1101747913050855e-06, + "loss": 1.4373, + "step": 150 + }, + { + "epoch": 0.8831908831908832, + "grad_norm": 4.994824409484863, + "learning_rate": 1.412394095017151e-06, + "loss": 1.4558, + "step": 155 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 3.992816925048828, + "learning_rate": 8.488005041679842e-07, + "loss": 1.4463, + "step": 160 + }, + { + "epoch": 0.9401709401709402, + "grad_norm": 3.7581281661987305, + "learning_rate": 4.2495989939384916e-07, + "loss": 1.5126, + "step": 165 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 3.792056083679199, + "learning_rate": 1.4505800291247207e-07, + "loss": 1.4521, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "grad_norm": 4.3386077880859375, + "learning_rate": 1.1859041590472352e-08, + "loss": 1.4799, + "step": 175 + }, + { + "epoch": 1.0, + "step": 176, + "total_flos": 3.941173665328005e+17, + "train_loss": 1.885472126643766, + "train_runtime": 151.7112, + "train_samples_per_second": 18.469, + "train_steps_per_second": 1.16 + } + ], + "logging_steps": 5, + "max_steps": 176, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.941173665328005e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/training_args.bin b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe23b5b9b1debc8390404580938ec8a5505b084e --- /dev/null +++ b/hotpotqa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e6575c7eaad6eeba26b1b2ae8bdc165ab078951c886c5486c4becfad525d8e +size 8081 diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..859b96ddbabc5f119f5e1bd52e2d7b6072fbacc8 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a344ed019f697b07db85d02164cd9366ae88335d --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "gate_proj", + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25844a3d0be2daeb96ab7f8e1a62582cc5d057b4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fa3bcf3a2a32b41ca909dabbb5384252c18ec8b72131d784cda5fceaf33253 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5ac18f17970f8e6c41fb8382fc953f40be99c00d --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8970279091854377e+18, + "train_loss": 0.46635930327050884, + "train_runtime": 1484.7838, + "train_samples": 14795, + "train_samples_per_second": 29.893, + "train_steps_per_second": 0.935 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5ac18f17970f8e6c41fb8382fc953f40be99c00d --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8970279091854377e+18, + "train_loss": 0.46635930327050884, + "train_runtime": 1484.7838, + "train_samples": 14795, + "train_samples_per_second": 29.893, + "train_steps_per_second": 0.935 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..458968e07f9725cc61d30fa07ed8bfb3962896d1 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1982 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1389, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010810810810810811, + "grad_norm": 0.6749756932258606, + "learning_rate": 1.7142857142857143e-06, + "loss": 1.6002, + "step": 5 + }, + { + "epoch": 0.021621621621621623, + "grad_norm": 0.6739484667778015, + "learning_rate": 3.857142857142857e-06, + "loss": 1.6545, + "step": 10 + }, + { + "epoch": 0.032432432432432434, + "grad_norm": 0.5106654167175293, + "learning_rate": 6e-06, + "loss": 1.5524, + "step": 15 + }, + { + "epoch": 0.043243243243243246, + "grad_norm": 0.5350106954574585, + "learning_rate": 8.142857142857142e-06, + "loss": 1.627, + "step": 20 + }, + { + "epoch": 0.05405405405405406, + "grad_norm": 0.4918515980243683, + "learning_rate": 1.0285714285714286e-05, + "loss": 1.5917, + "step": 25 + }, + { + "epoch": 0.06486486486486487, + "grad_norm": 0.5422373414039612, + "learning_rate": 1.242857142857143e-05, + "loss": 1.5633, + "step": 30 + }, + { + "epoch": 0.07567567567567568, + "grad_norm": 0.5008731484413147, + "learning_rate": 1.4571428571428571e-05, + "loss": 1.5382, + "step": 35 + }, + { + "epoch": 0.08648648648648649, + "grad_norm": 0.452938973903656, + "learning_rate": 1.6714285714285716e-05, + "loss": 1.5515, + "step": 40 + }, + { + "epoch": 0.0972972972972973, + "grad_norm": 0.49676385521888733, + "learning_rate": 1.8857142857142856e-05, + "loss": 1.5271, + "step": 45 + }, + { + "epoch": 0.10810810810810811, + "grad_norm": 0.5166845321655273, + "learning_rate": 2.1e-05, + "loss": 1.5582, + "step": 50 + }, + { + "epoch": 0.11891891891891893, + "grad_norm": 0.4737708270549774, + "learning_rate": 2.3142857142857145e-05, + "loss": 1.454, + "step": 55 + }, + { + "epoch": 0.12972972972972974, + "grad_norm": 0.5474684834480286, + "learning_rate": 2.5285714285714285e-05, + "loss": 1.418, + "step": 60 + }, + { + "epoch": 0.14054054054054055, + "grad_norm": 0.5287275314331055, + "learning_rate": 2.7428571428571428e-05, + "loss": 1.4695, + "step": 65 + }, + { + "epoch": 0.15135135135135136, + "grad_norm": 0.6109984517097473, + "learning_rate": 2.9571428571428575e-05, + "loss": 1.431, + "step": 70 + }, + { + "epoch": 0.16216216216216217, + "grad_norm": 0.6882091164588928, + "learning_rate": 2.999931924927058e-05, + "loss": 1.3871, + "step": 75 + }, + { + "epoch": 0.17297297297297298, + "grad_norm": 0.6718704104423523, + "learning_rate": 2.999655380533103e-05, + "loss": 1.4817, + "step": 80 + }, + { + "epoch": 0.1837837837837838, + "grad_norm": 0.6657449007034302, + "learning_rate": 2.999166151316113e-05, + "loss": 1.4664, + "step": 85 + }, + { + "epoch": 0.1945945945945946, + "grad_norm": 0.6535046100616455, + "learning_rate": 2.9984643066597815e-05, + "loss": 1.3768, + "step": 90 + }, + { + "epoch": 0.20540540540540542, + "grad_norm": 0.6701520085334778, + "learning_rate": 2.997549946101448e-05, + "loss": 1.2797, + "step": 95 + }, + { + "epoch": 0.21621621621621623, + "grad_norm": 0.6286271214485168, + "learning_rate": 2.9964231993179804e-05, + "loss": 1.315, + "step": 100 + }, + { + "epoch": 0.22702702702702704, + "grad_norm": 0.6639678478240967, + "learning_rate": 2.9950842261073867e-05, + "loss": 1.3076, + "step": 105 + }, + { + "epoch": 0.23783783783783785, + "grad_norm": 0.6478069424629211, + "learning_rate": 2.9935332163661465e-05, + "loss": 1.2784, + "step": 110 + }, + { + "epoch": 0.24864864864864866, + "grad_norm": 0.653148353099823, + "learning_rate": 2.9917703900622867e-05, + "loss": 1.3221, + "step": 115 + }, + { + "epoch": 0.2594594594594595, + "grad_norm": 0.6923774480819702, + "learning_rate": 2.9897959972041778e-05, + "loss": 1.3407, + "step": 120 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.7054397463798523, + "learning_rate": 2.9876103178050833e-05, + "loss": 1.2499, + "step": 125 + }, + { + "epoch": 0.2810810810810811, + "grad_norm": 0.7252037525177002, + "learning_rate": 2.9852136618434435e-05, + "loss": 1.1914, + "step": 130 + }, + { + "epoch": 0.2918918918918919, + "grad_norm": 0.8995503187179565, + "learning_rate": 2.9826063692189152e-05, + "loss": 1.2278, + "step": 135 + }, + { + "epoch": 0.3027027027027027, + "grad_norm": 0.8531567454338074, + "learning_rate": 2.9797888097041656e-05, + "loss": 1.1881, + "step": 140 + }, + { + "epoch": 0.31351351351351353, + "grad_norm": 0.8225644826889038, + "learning_rate": 2.9767613828924316e-05, + "loss": 1.1099, + "step": 145 + }, + { + "epoch": 0.32432432432432434, + "grad_norm": 0.7384702563285828, + "learning_rate": 2.9735245181408466e-05, + "loss": 1.124, + "step": 150 + }, + { + "epoch": 0.33513513513513515, + "grad_norm": 0.7921214699745178, + "learning_rate": 2.9700786745095483e-05, + "loss": 1.1146, + "step": 155 + }, + { + "epoch": 0.34594594594594597, + "grad_norm": 0.8876392841339111, + "learning_rate": 2.966424340696577e-05, + "loss": 1.0549, + "step": 160 + }, + { + "epoch": 0.3567567567567568, + "grad_norm": 0.8177557587623596, + "learning_rate": 2.9625620349685617e-05, + "loss": 1.1194, + "step": 165 + }, + { + "epoch": 0.3675675675675676, + "grad_norm": 0.9204846620559692, + "learning_rate": 2.9584923050872218e-05, + "loss": 1.147, + "step": 170 + }, + { + "epoch": 0.3783783783783784, + "grad_norm": 0.962609052658081, + "learning_rate": 2.9542157282316822e-05, + "loss": 1.0603, + "step": 175 + }, + { + "epoch": 0.3891891891891892, + "grad_norm": 0.9691535234451294, + "learning_rate": 2.9497329109166154e-05, + "loss": 1.0567, + "step": 180 + }, + { + "epoch": 0.4, + "grad_norm": 0.9528350234031677, + "learning_rate": 2.9450444889062243e-05, + "loss": 1.119, + "step": 185 + }, + { + "epoch": 0.41081081081081083, + "grad_norm": 1.0783238410949707, + "learning_rate": 2.940151127124076e-05, + "loss": 1.0226, + "step": 190 + }, + { + "epoch": 0.42162162162162165, + "grad_norm": 0.9221756458282471, + "learning_rate": 2.935053519558802e-05, + "loss": 1.0075, + "step": 195 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 0.9663214683532715, + "learning_rate": 2.929752389165673e-05, + "loss": 1.0586, + "step": 200 + }, + { + "epoch": 0.44324324324324327, + "grad_norm": 1.1029077768325806, + "learning_rate": 2.9242484877640706e-05, + "loss": 0.969, + "step": 205 + }, + { + "epoch": 0.4540540540540541, + "grad_norm": 0.9367722272872925, + "learning_rate": 2.9185425959308596e-05, + "loss": 0.9634, + "step": 210 + }, + { + "epoch": 0.4648648648648649, + "grad_norm": 1.0548895597457886, + "learning_rate": 2.912635522889686e-05, + "loss": 1.0103, + "step": 215 + }, + { + "epoch": 0.4756756756756757, + "grad_norm": 1.347251534461975, + "learning_rate": 2.9065281063962106e-05, + "loss": 0.94, + "step": 220 + }, + { + "epoch": 0.4864864864864865, + "grad_norm": 1.0111850500106812, + "learning_rate": 2.900221212619297e-05, + "loss": 0.9342, + "step": 225 + }, + { + "epoch": 0.4972972972972973, + "grad_norm": 0.9622715711593628, + "learning_rate": 2.893715736018168e-05, + "loss": 1.0053, + "step": 230 + }, + { + "epoch": 0.5081081081081081, + "grad_norm": 1.0898102521896362, + "learning_rate": 2.8870125992155527e-05, + "loss": 0.9361, + "step": 235 + }, + { + "epoch": 0.518918918918919, + "grad_norm": 1.1396633386611938, + "learning_rate": 2.8801127528668372e-05, + "loss": 0.921, + "step": 240 + }, + { + "epoch": 0.5297297297297298, + "grad_norm": 0.9959793090820312, + "learning_rate": 2.87301717552524e-05, + "loss": 0.8875, + "step": 245 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 1.1547890901565552, + "learning_rate": 2.8657268735030316e-05, + "loss": 0.8682, + "step": 250 + }, + { + "epoch": 0.5513513513513514, + "grad_norm": 1.0546422004699707, + "learning_rate": 2.8582428807288182e-05, + "loss": 0.8818, + "step": 255 + }, + { + "epoch": 0.5621621621621622, + "grad_norm": 1.1594375371932983, + "learning_rate": 2.850566258600903e-05, + "loss": 0.8826, + "step": 260 + }, + { + "epoch": 0.572972972972973, + "grad_norm": 1.2605596780776978, + "learning_rate": 2.8426980958367618e-05, + "loss": 0.8348, + "step": 265 + }, + { + "epoch": 0.5837837837837838, + "grad_norm": 1.194010853767395, + "learning_rate": 2.8346395083186336e-05, + "loss": 0.8428, + "step": 270 + }, + { + "epoch": 0.5945945945945946, + "grad_norm": 1.0932097434997559, + "learning_rate": 2.8263916389352673e-05, + "loss": 0.7917, + "step": 275 + }, + { + "epoch": 0.6054054054054054, + "grad_norm": 1.2288398742675781, + "learning_rate": 2.8179556574198312e-05, + "loss": 0.8138, + "step": 280 + }, + { + "epoch": 0.6162162162162163, + "grad_norm": 1.1970932483673096, + "learning_rate": 2.80933276018402e-05, + "loss": 0.8412, + "step": 285 + }, + { + "epoch": 0.6270270270270271, + "grad_norm": 1.0771979093551636, + "learning_rate": 2.800524170148377e-05, + "loss": 0.7551, + "step": 290 + }, + { + "epoch": 0.6378378378378379, + "grad_norm": 1.191752552986145, + "learning_rate": 2.7915311365688562e-05, + "loss": 0.7675, + "step": 295 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 1.2224516868591309, + "learning_rate": 2.78235493485965e-05, + "loss": 0.7927, + "step": 300 + }, + { + "epoch": 0.6594594594594595, + "grad_norm": 1.2681128978729248, + "learning_rate": 2.7729968664123056e-05, + "loss": 0.765, + "step": 305 + }, + { + "epoch": 0.6702702702702703, + "grad_norm": 1.0764217376708984, + "learning_rate": 2.7634582584111605e-05, + "loss": 0.7519, + "step": 310 + }, + { + "epoch": 0.6810810810810811, + "grad_norm": 1.1778703927993774, + "learning_rate": 2.753740463645115e-05, + "loss": 0.7052, + "step": 315 + }, + { + "epoch": 0.6918918918918919, + "grad_norm": 1.1717065572738647, + "learning_rate": 2.7438448603157802e-05, + "loss": 0.7107, + "step": 320 + }, + { + "epoch": 0.7027027027027027, + "grad_norm": 1.1341768503189087, + "learning_rate": 2.7337728518420143e-05, + "loss": 0.7493, + "step": 325 + }, + { + "epoch": 0.7135135135135136, + "grad_norm": 1.397209644317627, + "learning_rate": 2.723525866660889e-05, + "loss": 0.7303, + "step": 330 + }, + { + "epoch": 0.7243243243243244, + "grad_norm": 1.1951305866241455, + "learning_rate": 2.713105358025104e-05, + "loss": 0.7071, + "step": 335 + }, + { + "epoch": 0.7351351351351352, + "grad_norm": 1.187578797340393, + "learning_rate": 2.7025128037968824e-05, + "loss": 0.66, + "step": 340 + }, + { + "epoch": 0.745945945945946, + "grad_norm": 1.2162693738937378, + "learning_rate": 2.6917497062383776e-05, + "loss": 0.7046, + "step": 345 + }, + { + "epoch": 0.7567567567567568, + "grad_norm": 1.1686859130859375, + "learning_rate": 2.680817591798619e-05, + "loss": 0.6775, + "step": 350 + }, + { + "epoch": 0.7675675675675676, + "grad_norm": 1.3530421257019043, + "learning_rate": 2.669718010897026e-05, + "loss": 0.6515, + "step": 355 + }, + { + "epoch": 0.7783783783783784, + "grad_norm": 1.2877717018127441, + "learning_rate": 2.6584525377035257e-05, + "loss": 0.633, + "step": 360 + }, + { + "epoch": 0.7891891891891892, + "grad_norm": 1.2025889158248901, + "learning_rate": 2.6470227699152975e-05, + "loss": 0.6563, + "step": 365 + }, + { + "epoch": 0.8, + "grad_norm": 1.236899971961975, + "learning_rate": 2.635430328530186e-05, + "loss": 0.6818, + "step": 370 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 1.1630672216415405, + "learning_rate": 2.6236768576168065e-05, + "loss": 0.6385, + "step": 375 + }, + { + "epoch": 0.8216216216216217, + "grad_norm": 1.5487016439437866, + "learning_rate": 2.611764024081378e-05, + "loss": 0.6651, + "step": 380 + }, + { + "epoch": 0.8324324324324325, + "grad_norm": 1.2410399913787842, + "learning_rate": 2.5996935174313215e-05, + "loss": 0.5957, + "step": 385 + }, + { + "epoch": 0.8432432432432433, + "grad_norm": 1.1955680847167969, + "learning_rate": 2.587467049535645e-05, + "loss": 0.5786, + "step": 390 + }, + { + "epoch": 0.8540540540540541, + "grad_norm": 1.2050449848175049, + "learning_rate": 2.575086354382167e-05, + "loss": 0.5923, + "step": 395 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 1.3376110792160034, + "learning_rate": 2.5625531878315956e-05, + "loss": 0.6241, + "step": 400 + }, + { + "epoch": 0.8756756756756757, + "grad_norm": 1.433042049407959, + "learning_rate": 2.5498693273685074e-05, + "loss": 0.5906, + "step": 405 + }, + { + "epoch": 0.8864864864864865, + "grad_norm": 1.1621979475021362, + "learning_rate": 2.5370365718492615e-05, + "loss": 0.5668, + "step": 410 + }, + { + "epoch": 0.8972972972972973, + "grad_norm": 1.1963423490524292, + "learning_rate": 2.5240567412468816e-05, + "loss": 0.5354, + "step": 415 + }, + { + "epoch": 0.9081081081081082, + "grad_norm": 1.2802984714508057, + "learning_rate": 2.5109316763929414e-05, + "loss": 0.6125, + "step": 420 + }, + { + "epoch": 0.918918918918919, + "grad_norm": 1.1878982782363892, + "learning_rate": 2.497663238716495e-05, + "loss": 0.5889, + "step": 425 + }, + { + "epoch": 0.9297297297297298, + "grad_norm": 1.6462504863739014, + "learning_rate": 2.4842533099800822e-05, + "loss": 0.5493, + "step": 430 + }, + { + "epoch": 0.9405405405405406, + "grad_norm": 1.3050243854522705, + "learning_rate": 2.4707037920128554e-05, + "loss": 0.5801, + "step": 435 + }, + { + "epoch": 0.9513513513513514, + "grad_norm": 1.1411267518997192, + "learning_rate": 2.4570166064408556e-05, + "loss": 0.6013, + "step": 440 + }, + { + "epoch": 0.9621621621621622, + "grad_norm": 1.378894567489624, + "learning_rate": 2.4431936944144818e-05, + "loss": 0.4858, + "step": 445 + }, + { + "epoch": 0.972972972972973, + "grad_norm": 1.3321566581726074, + "learning_rate": 2.4292370163331936e-05, + "loss": 0.525, + "step": 450 + }, + { + "epoch": 0.9837837837837838, + "grad_norm": 1.2378548383712769, + "learning_rate": 2.4151485515674837e-05, + "loss": 0.5759, + "step": 455 + }, + { + "epoch": 0.9945945945945946, + "grad_norm": 1.3089052438735962, + "learning_rate": 2.400930298178155e-05, + "loss": 0.5343, + "step": 460 + }, + { + "epoch": 1.0043243243243243, + "grad_norm": 1.2367912530899048, + "learning_rate": 2.3865842726329556e-05, + "loss": 0.5467, + "step": 465 + }, + { + "epoch": 1.0151351351351352, + "grad_norm": 1.1718990802764893, + "learning_rate": 2.3721125095205935e-05, + "loss": 0.4462, + "step": 470 + }, + { + "epoch": 1.025945945945946, + "grad_norm": 1.3019416332244873, + "learning_rate": 2.357517061262189e-05, + "loss": 0.4097, + "step": 475 + }, + { + "epoch": 1.0367567567567568, + "grad_norm": 1.4085646867752075, + "learning_rate": 2.3427999978201956e-05, + "loss": 0.5282, + "step": 480 + }, + { + "epoch": 1.0475675675675675, + "grad_norm": 1.1572306156158447, + "learning_rate": 2.3279634064048308e-05, + "loss": 0.4134, + "step": 485 + }, + { + "epoch": 1.0583783783783784, + "grad_norm": 1.244713306427002, + "learning_rate": 2.3130093911780642e-05, + "loss": 0.4335, + "step": 490 + }, + { + "epoch": 1.0691891891891891, + "grad_norm": 1.173665165901184, + "learning_rate": 2.2979400729552014e-05, + "loss": 0.444, + "step": 495 + }, + { + "epoch": 1.08, + "grad_norm": 1.407321810722351, + "learning_rate": 2.2827575889041007e-05, + "loss": 0.4337, + "step": 500 + }, + { + "epoch": 1.0908108108108108, + "grad_norm": 1.3847451210021973, + "learning_rate": 2.267464092242078e-05, + "loss": 0.4213, + "step": 505 + }, + { + "epoch": 1.1016216216216217, + "grad_norm": 1.4645717144012451, + "learning_rate": 2.2520617519305325e-05, + "loss": 0.4171, + "step": 510 + }, + { + "epoch": 1.1124324324324324, + "grad_norm": 1.3133924007415771, + "learning_rate": 2.2365527523673368e-05, + "loss": 0.4461, + "step": 515 + }, + { + "epoch": 1.1232432432432433, + "grad_norm": 1.4021127223968506, + "learning_rate": 2.2209392930770424e-05, + "loss": 0.4047, + "step": 520 + }, + { + "epoch": 1.134054054054054, + "grad_norm": 1.2070517539978027, + "learning_rate": 2.2052235883989355e-05, + "loss": 0.4069, + "step": 525 + }, + { + "epoch": 1.144864864864865, + "grad_norm": 1.3282095193862915, + "learning_rate": 2.189407867172997e-05, + "loss": 0.406, + "step": 530 + }, + { + "epoch": 1.1556756756756756, + "grad_norm": 1.159316062927246, + "learning_rate": 2.1734943724237986e-05, + "loss": 0.4207, + "step": 535 + }, + { + "epoch": 1.1664864864864866, + "grad_norm": 1.5306358337402344, + "learning_rate": 2.157485361042398e-05, + "loss": 0.388, + "step": 540 + }, + { + "epoch": 1.1772972972972973, + "grad_norm": 1.1564973592758179, + "learning_rate": 2.1413831034662536e-05, + "loss": 0.395, + "step": 545 + }, + { + "epoch": 1.1881081081081082, + "grad_norm": 1.439918875694275, + "learning_rate": 2.1251898833572303e-05, + "loss": 0.3825, + "step": 550 + }, + { + "epoch": 1.1989189189189189, + "grad_norm": 1.30429208278656, + "learning_rate": 2.1089079972777238e-05, + "loss": 0.3605, + "step": 555 + }, + { + "epoch": 1.2097297297297298, + "grad_norm": 1.2694649696350098, + "learning_rate": 2.092539754364957e-05, + "loss": 0.3555, + "step": 560 + }, + { + "epoch": 1.2205405405405405, + "grad_norm": 1.301515817642212, + "learning_rate": 2.07608747600349e-05, + "loss": 0.3516, + "step": 565 + }, + { + "epoch": 1.2313513513513514, + "grad_norm": 1.297397255897522, + "learning_rate": 2.0595534954959984e-05, + "loss": 0.3977, + "step": 570 + }, + { + "epoch": 1.2421621621621621, + "grad_norm": 1.3568118810653687, + "learning_rate": 2.0429401577323576e-05, + "loss": 0.3798, + "step": 575 + }, + { + "epoch": 1.252972972972973, + "grad_norm": 1.360713243484497, + "learning_rate": 2.0262498188570855e-05, + "loss": 0.3432, + "step": 580 + }, + { + "epoch": 1.2637837837837838, + "grad_norm": 1.4393284320831299, + "learning_rate": 2.0094848459351853e-05, + "loss": 0.34, + "step": 585 + }, + { + "epoch": 1.2745945945945947, + "grad_norm": 1.3435497283935547, + "learning_rate": 1.992647616616447e-05, + "loss": 0.3989, + "step": 590 + }, + { + "epoch": 1.2854054054054054, + "grad_norm": 1.312248945236206, + "learning_rate": 1.9757405187982397e-05, + "loss": 0.3532, + "step": 595 + }, + { + "epoch": 1.2962162162162163, + "grad_norm": 1.5269815921783447, + "learning_rate": 1.9587659502868546e-05, + "loss": 0.3891, + "step": 600 + }, + { + "epoch": 1.307027027027027, + "grad_norm": 1.4130357503890991, + "learning_rate": 1.9417263184574453e-05, + "loss": 0.3713, + "step": 605 + }, + { + "epoch": 1.3178378378378377, + "grad_norm": 1.3583375215530396, + "learning_rate": 1.9246240399126036e-05, + "loss": 0.3474, + "step": 610 + }, + { + "epoch": 1.3286486486486486, + "grad_norm": 1.3835967779159546, + "learning_rate": 1.907461540139633e-05, + "loss": 0.3497, + "step": 615 + }, + { + "epoch": 1.3394594594594595, + "grad_norm": 1.390000820159912, + "learning_rate": 1.8902412531665613e-05, + "loss": 0.3002, + "step": 620 + }, + { + "epoch": 1.3502702702702702, + "grad_norm": 1.2552826404571533, + "learning_rate": 1.872965621216938e-05, + "loss": 0.2847, + "step": 625 + }, + { + "epoch": 1.361081081081081, + "grad_norm": 1.3595759868621826, + "learning_rate": 1.855637094363474e-05, + "loss": 0.3211, + "step": 630 + }, + { + "epoch": 1.3718918918918919, + "grad_norm": 1.2867708206176758, + "learning_rate": 1.8382581301805676e-05, + "loss": 0.3547, + "step": 635 + }, + { + "epoch": 1.3827027027027028, + "grad_norm": 1.43692946434021, + "learning_rate": 1.8208311933957606e-05, + "loss": 0.3251, + "step": 640 + }, + { + "epoch": 1.3935135135135135, + "grad_norm": 1.396464467048645, + "learning_rate": 1.80335875554019e-05, + "loss": 0.2924, + "step": 645 + }, + { + "epoch": 1.4043243243243242, + "grad_norm": 1.2472106218338013, + "learning_rate": 1.7858432945980645e-05, + "loss": 0.3178, + "step": 650 + }, + { + "epoch": 1.4151351351351351, + "grad_norm": 1.4300161600112915, + "learning_rate": 1.7682872946552352e-05, + "loss": 0.3295, + "step": 655 + }, + { + "epoch": 1.425945945945946, + "grad_norm": 1.3491531610488892, + "learning_rate": 1.750693245546893e-05, + "loss": 0.3242, + "step": 660 + }, + { + "epoch": 1.4367567567567567, + "grad_norm": 1.2698180675506592, + "learning_rate": 1.7330636425044553e-05, + "loss": 0.2917, + "step": 665 + }, + { + "epoch": 1.4475675675675674, + "grad_norm": 1.3012919425964355, + "learning_rate": 1.7154009858016867e-05, + "loss": 0.3006, + "step": 670 + }, + { + "epoch": 1.4583783783783784, + "grad_norm": 1.333957314491272, + "learning_rate": 1.697707780400102e-05, + "loss": 0.2756, + "step": 675 + }, + { + "epoch": 1.4691891891891893, + "grad_norm": 1.436802625656128, + "learning_rate": 1.67998653559371e-05, + "loss": 0.3122, + "step": 680 + }, + { + "epoch": 1.48, + "grad_norm": 1.6837379932403564, + "learning_rate": 1.6622397646531352e-05, + "loss": 0.2826, + "step": 685 + }, + { + "epoch": 1.4908108108108107, + "grad_norm": 1.2060457468032837, + "learning_rate": 1.644469984469182e-05, + "loss": 0.2958, + "step": 690 + }, + { + "epoch": 1.5016216216216216, + "grad_norm": 1.5616896152496338, + "learning_rate": 1.6266797151958815e-05, + "loss": 0.3542, + "step": 695 + }, + { + "epoch": 1.5124324324324325, + "grad_norm": 1.284088134765625, + "learning_rate": 1.6088714798930806e-05, + "loss": 0.3139, + "step": 700 + }, + { + "epoch": 1.5232432432432432, + "grad_norm": 1.1483250856399536, + "learning_rate": 1.59104780416861e-05, + "loss": 0.2875, + "step": 705 + }, + { + "epoch": 1.534054054054054, + "grad_norm": 1.4885141849517822, + "learning_rate": 1.5732112158200995e-05, + "loss": 0.2942, + "step": 710 + }, + { + "epoch": 1.5448648648648649, + "grad_norm": 1.228744626045227, + "learning_rate": 1.5553642444764808e-05, + "loss": 0.2939, + "step": 715 + }, + { + "epoch": 1.5556756756756758, + "grad_norm": 1.2387549877166748, + "learning_rate": 1.5375094212392253e-05, + "loss": 0.2882, + "step": 720 + }, + { + "epoch": 1.5664864864864865, + "grad_norm": 1.2862504720687866, + "learning_rate": 1.5196492783233815e-05, + "loss": 0.2975, + "step": 725 + }, + { + "epoch": 1.5772972972972972, + "grad_norm": 1.3139840364456177, + "learning_rate": 1.501786348698446e-05, + "loss": 0.2802, + "step": 730 + }, + { + "epoch": 1.588108108108108, + "grad_norm": 1.3352205753326416, + "learning_rate": 1.4839231657291343e-05, + "loss": 0.2577, + "step": 735 + }, + { + "epoch": 1.598918918918919, + "grad_norm": 1.234085202217102, + "learning_rate": 1.4660622628160921e-05, + "loss": 0.2855, + "step": 740 + }, + { + "epoch": 1.6097297297297297, + "grad_norm": 1.0972801446914673, + "learning_rate": 1.4482061730366009e-05, + "loss": 0.2314, + "step": 745 + }, + { + "epoch": 1.6205405405405404, + "grad_norm": 1.3880709409713745, + "learning_rate": 1.4303574287853324e-05, + "loss": 0.2443, + "step": 750 + }, + { + "epoch": 1.6313513513513513, + "grad_norm": 1.198792576789856, + "learning_rate": 1.4125185614151967e-05, + "loss": 0.217, + "step": 755 + }, + { + "epoch": 1.6421621621621623, + "grad_norm": 1.3121585845947266, + "learning_rate": 1.3946921008783418e-05, + "loss": 0.2606, + "step": 760 + }, + { + "epoch": 1.652972972972973, + "grad_norm": 1.3656197786331177, + "learning_rate": 1.3768805753673465e-05, + "loss": 0.2372, + "step": 765 + }, + { + "epoch": 1.6637837837837837, + "grad_norm": 1.1282933950424194, + "learning_rate": 1.359086510956668e-05, + "loss": 0.2716, + "step": 770 + }, + { + "epoch": 1.6745945945945946, + "grad_norm": 1.2672842741012573, + "learning_rate": 1.3413124312443874e-05, + "loss": 0.2448, + "step": 775 + }, + { + "epoch": 1.6854054054054055, + "grad_norm": 1.3546538352966309, + "learning_rate": 1.3235608569943059e-05, + "loss": 0.2314, + "step": 780 + }, + { + "epoch": 1.6962162162162162, + "grad_norm": 1.421842336654663, + "learning_rate": 1.3058343057784458e-05, + "loss": 0.2107, + "step": 785 + }, + { + "epoch": 1.707027027027027, + "grad_norm": 1.2018187046051025, + "learning_rate": 1.2881352916199988e-05, + "loss": 0.2513, + "step": 790 + }, + { + "epoch": 1.7178378378378378, + "grad_norm": 1.2349228858947754, + "learning_rate": 1.270466324636786e-05, + "loss": 0.2437, + "step": 795 + }, + { + "epoch": 1.7286486486486488, + "grad_norm": 1.2098289728164673, + "learning_rate": 1.252829910685263e-05, + "loss": 0.2504, + "step": 800 + }, + { + "epoch": 1.7394594594594595, + "grad_norm": 1.5244630575180054, + "learning_rate": 1.2352285510051324e-05, + "loss": 0.2744, + "step": 805 + }, + { + "epoch": 1.7502702702702702, + "grad_norm": 1.1220543384552002, + "learning_rate": 1.2176647418646156e-05, + "loss": 0.27, + "step": 810 + }, + { + "epoch": 1.761081081081081, + "grad_norm": 1.3971467018127441, + "learning_rate": 1.2001409742064245e-05, + "loss": 0.2063, + "step": 815 + }, + { + "epoch": 1.771891891891892, + "grad_norm": 1.285480260848999, + "learning_rate": 1.1826597332944873e-05, + "loss": 0.2465, + "step": 820 + }, + { + "epoch": 1.7827027027027027, + "grad_norm": 1.1862159967422485, + "learning_rate": 1.1652234983614848e-05, + "loss": 0.1919, + "step": 825 + }, + { + "epoch": 1.7935135135135134, + "grad_norm": 1.2218137979507446, + "learning_rate": 1.1478347422572396e-05, + "loss": 0.2234, + "step": 830 + }, + { + "epoch": 1.8043243243243243, + "grad_norm": 1.3611098527908325, + "learning_rate": 1.1304959310980097e-05, + "loss": 0.242, + "step": 835 + }, + { + "epoch": 1.8151351351351352, + "grad_norm": 1.190139889717102, + "learning_rate": 1.113209523916737e-05, + "loss": 0.2234, + "step": 840 + }, + { + "epoch": 1.825945945945946, + "grad_norm": 1.226676344871521, + "learning_rate": 1.0959779723143024e-05, + "loss": 0.2093, + "step": 845 + }, + { + "epoch": 1.8367567567567566, + "grad_norm": 1.5197325944900513, + "learning_rate": 1.0788037201118346e-05, + "loss": 0.1876, + "step": 850 + }, + { + "epoch": 1.8475675675675676, + "grad_norm": 1.2957510948181152, + "learning_rate": 1.0616892030041184e-05, + "loss": 0.1993, + "step": 855 + }, + { + "epoch": 1.8583783783783785, + "grad_norm": 1.3621931076049805, + "learning_rate": 1.0446368482141612e-05, + "loss": 0.2042, + "step": 860 + }, + { + "epoch": 1.8691891891891892, + "grad_norm": 1.3100106716156006, + "learning_rate": 1.027649074148956e-05, + "loss": 0.2088, + "step": 865 + }, + { + "epoch": 1.88, + "grad_norm": 1.2206536531448364, + "learning_rate": 1.0107282900565009e-05, + "loss": 0.1825, + "step": 870 + }, + { + "epoch": 1.8908108108108108, + "grad_norm": 1.2510806322097778, + "learning_rate": 9.938768956841077e-06, + "loss": 0.1935, + "step": 875 + }, + { + "epoch": 1.9016216216216217, + "grad_norm": 1.1333528757095337, + "learning_rate": 9.770972809380696e-06, + "loss": 0.2062, + "step": 880 + }, + { + "epoch": 1.9124324324324324, + "grad_norm": 1.1528996229171753, + "learning_rate": 9.603918255447141e-06, + "loss": 0.1802, + "step": 885 + }, + { + "epoch": 1.9232432432432431, + "grad_norm": 1.113772988319397, + "learning_rate": 9.437628987129084e-06, + "loss": 0.1865, + "step": 890 + }, + { + "epoch": 1.934054054054054, + "grad_norm": 1.3337033987045288, + "learning_rate": 9.272128587980498e-06, + "loss": 0.2258, + "step": 895 + }, + { + "epoch": 1.944864864864865, + "grad_norm": 1.5289134979248047, + "learning_rate": 9.107440529675971e-06, + "loss": 0.1924, + "step": 900 + }, + { + "epoch": 1.9556756756756757, + "grad_norm": 1.144655704498291, + "learning_rate": 8.943588168681937e-06, + "loss": 0.1913, + "step": 905 + }, + { + "epoch": 1.9664864864864864, + "grad_norm": 1.1703957319259644, + "learning_rate": 8.780594742944159e-06, + "loss": 0.1671, + "step": 910 + }, + { + "epoch": 1.9772972972972973, + "grad_norm": 1.2245736122131348, + "learning_rate": 8.618483368592088e-06, + "loss": 0.1922, + "step": 915 + }, + { + "epoch": 1.9881081081081082, + "grad_norm": 1.141701102256775, + "learning_rate": 8.457277036660463e-06, + "loss": 0.1909, + "step": 920 + }, + { + "epoch": 1.998918918918919, + "grad_norm": 1.3344765901565552, + "learning_rate": 8.296998609828704e-06, + "loss": 0.1666, + "step": 925 + }, + { + "epoch": 2.0086486486486486, + "grad_norm": 1.3570910692214966, + "learning_rate": 8.13767081917841e-06, + "loss": 0.1632, + "step": 930 + }, + { + "epoch": 2.0194594594594593, + "grad_norm": 1.010612964630127, + "learning_rate": 7.97931626096964e-06, + "loss": 0.1403, + "step": 935 + }, + { + "epoch": 2.0302702702702704, + "grad_norm": 1.2923632860183716, + "learning_rate": 7.821957393436202e-06, + "loss": 0.1433, + "step": 940 + }, + { + "epoch": 2.041081081081081, + "grad_norm": 1.1319085359573364, + "learning_rate": 7.665616533600628e-06, + "loss": 0.15, + "step": 945 + }, + { + "epoch": 2.051891891891892, + "grad_norm": 1.172631025314331, + "learning_rate": 7.5103158541090665e-06, + "loss": 0.1367, + "step": 950 + }, + { + "epoch": 2.0627027027027025, + "grad_norm": 1.079500675201416, + "learning_rate": 7.356077380086726e-06, + "loss": 0.1169, + "step": 955 + }, + { + "epoch": 2.0735135135135137, + "grad_norm": 1.3374136686325073, + "learning_rate": 7.202922986014228e-06, + "loss": 0.1247, + "step": 960 + }, + { + "epoch": 2.0843243243243244, + "grad_norm": 1.0975528955459595, + "learning_rate": 7.050874392625302e-06, + "loss": 0.1366, + "step": 965 + }, + { + "epoch": 2.095135135135135, + "grad_norm": 1.2024213075637817, + "learning_rate": 6.899953163826292e-06, + "loss": 0.1312, + "step": 970 + }, + { + "epoch": 2.1059459459459458, + "grad_norm": 1.082950472831726, + "learning_rate": 6.7501807036379125e-06, + "loss": 0.1357, + "step": 975 + }, + { + "epoch": 2.116756756756757, + "grad_norm": 1.2047678232192993, + "learning_rate": 6.601578253159698e-06, + "loss": 0.151, + "step": 980 + }, + { + "epoch": 2.1275675675675676, + "grad_norm": 1.2053114175796509, + "learning_rate": 6.454166887557508e-06, + "loss": 0.1277, + "step": 985 + }, + { + "epoch": 2.1383783783783783, + "grad_norm": 1.0474656820297241, + "learning_rate": 6.307967513074605e-06, + "loss": 0.1319, + "step": 990 + }, + { + "epoch": 2.149189189189189, + "grad_norm": 1.2605841159820557, + "learning_rate": 6.163000864066698e-06, + "loss": 0.1478, + "step": 995 + }, + { + "epoch": 2.16, + "grad_norm": 0.9077703952789307, + "learning_rate": 6.019287500061326e-06, + "loss": 0.1534, + "step": 1000 + }, + { + "epoch": 2.170810810810811, + "grad_norm": 1.0507147312164307, + "learning_rate": 5.876847802842052e-06, + "loss": 0.1381, + "step": 1005 + }, + { + "epoch": 2.1816216216216215, + "grad_norm": 1.0697144269943237, + "learning_rate": 5.73570197355788e-06, + "loss": 0.1324, + "step": 1010 + }, + { + "epoch": 2.1924324324324322, + "grad_norm": 1.0238703489303589, + "learning_rate": 5.595870029858268e-06, + "loss": 0.1278, + "step": 1015 + }, + { + "epoch": 2.2032432432432434, + "grad_norm": 1.0087915658950806, + "learning_rate": 5.4573718030541965e-06, + "loss": 0.1288, + "step": 1020 + }, + { + "epoch": 2.214054054054054, + "grad_norm": 0.9904815554618835, + "learning_rate": 5.320226935305609e-06, + "loss": 0.1173, + "step": 1025 + }, + { + "epoch": 2.224864864864865, + "grad_norm": 1.0402568578720093, + "learning_rate": 5.184454876835746e-06, + "loss": 0.1243, + "step": 1030 + }, + { + "epoch": 2.2356756756756755, + "grad_norm": 1.169760823249817, + "learning_rate": 5.05007488317265e-06, + "loss": 0.1212, + "step": 1035 + }, + { + "epoch": 2.2464864864864866, + "grad_norm": 1.160381555557251, + "learning_rate": 4.917106012418294e-06, + "loss": 0.1327, + "step": 1040 + }, + { + "epoch": 2.2572972972972973, + "grad_norm": 0.8716002106666565, + "learning_rate": 4.7855671225457225e-06, + "loss": 0.1173, + "step": 1045 + }, + { + "epoch": 2.268108108108108, + "grad_norm": 0.8988707065582275, + "learning_rate": 4.655476868724566e-06, + "loss": 0.1179, + "step": 1050 + }, + { + "epoch": 2.2789189189189187, + "grad_norm": 1.1103434562683105, + "learning_rate": 4.526853700675325e-06, + "loss": 0.1284, + "step": 1055 + }, + { + "epoch": 2.28972972972973, + "grad_norm": 1.0457454919815063, + "learning_rate": 4.399715860052781e-06, + "loss": 0.1293, + "step": 1060 + }, + { + "epoch": 2.3005405405405406, + "grad_norm": 0.9258773326873779, + "learning_rate": 4.274081377858909e-06, + "loss": 0.0914, + "step": 1065 + }, + { + "epoch": 2.3113513513513513, + "grad_norm": 1.0390188694000244, + "learning_rate": 4.149968071885682e-06, + "loss": 0.1125, + "step": 1070 + }, + { + "epoch": 2.322162162162162, + "grad_norm": 1.0505141019821167, + "learning_rate": 4.027393544188129e-06, + "loss": 0.122, + "step": 1075 + }, + { + "epoch": 2.332972972972973, + "grad_norm": 1.2161805629730225, + "learning_rate": 3.9063751785879365e-06, + "loss": 0.109, + "step": 1080 + }, + { + "epoch": 2.343783783783784, + "grad_norm": 1.097092628479004, + "learning_rate": 3.786930138208046e-06, + "loss": 0.1149, + "step": 1085 + }, + { + "epoch": 2.3545945945945945, + "grad_norm": 0.751530647277832, + "learning_rate": 3.6690753630385436e-06, + "loss": 0.1093, + "step": 1090 + }, + { + "epoch": 2.3654054054054052, + "grad_norm": 0.966195821762085, + "learning_rate": 3.552827567534186e-06, + "loss": 0.1116, + "step": 1095 + }, + { + "epoch": 2.3762162162162164, + "grad_norm": 1.1819593906402588, + "learning_rate": 3.4382032382439036e-06, + "loss": 0.1167, + "step": 1100 + }, + { + "epoch": 2.387027027027027, + "grad_norm": 0.9405239224433899, + "learning_rate": 3.3252186314726447e-06, + "loss": 0.1174, + "step": 1105 + }, + { + "epoch": 2.3978378378378378, + "grad_norm": 1.1678102016448975, + "learning_rate": 3.213889770975881e-06, + "loss": 0.1196, + "step": 1110 + }, + { + "epoch": 2.4086486486486485, + "grad_norm": 0.9688264727592468, + "learning_rate": 3.1042324456870514e-06, + "loss": 0.119, + "step": 1115 + }, + { + "epoch": 2.4194594594594596, + "grad_norm": 0.8630348443984985, + "learning_rate": 2.996262207478356e-06, + "loss": 0.1265, + "step": 1120 + }, + { + "epoch": 2.4302702702702703, + "grad_norm": 0.9478833079338074, + "learning_rate": 2.8899943689551493e-06, + "loss": 0.122, + "step": 1125 + }, + { + "epoch": 2.441081081081081, + "grad_norm": 0.9283960461616516, + "learning_rate": 2.7854440012842684e-06, + "loss": 0.1087, + "step": 1130 + }, + { + "epoch": 2.4518918918918917, + "grad_norm": 0.8862292766571045, + "learning_rate": 2.6826259320565903e-06, + "loss": 0.1025, + "step": 1135 + }, + { + "epoch": 2.462702702702703, + "grad_norm": 1.147686243057251, + "learning_rate": 2.581554743184158e-06, + "loss": 0.1315, + "step": 1140 + }, + { + "epoch": 2.4735135135135136, + "grad_norm": 0.9898723363876343, + "learning_rate": 2.482244768832146e-06, + "loss": 0.1242, + "step": 1145 + }, + { + "epoch": 2.4843243243243243, + "grad_norm": 1.0704761743545532, + "learning_rate": 2.384710093385929e-06, + "loss": 0.115, + "step": 1150 + }, + { + "epoch": 2.495135135135135, + "grad_norm": 0.9278268814086914, + "learning_rate": 2.288964549453633e-06, + "loss": 0.1071, + "step": 1155 + }, + { + "epoch": 2.505945945945946, + "grad_norm": 0.8467145562171936, + "learning_rate": 2.1950217159043197e-06, + "loss": 0.114, + "step": 1160 + }, + { + "epoch": 2.516756756756757, + "grad_norm": 1.036379337310791, + "learning_rate": 2.102894915942243e-06, + "loss": 0.1097, + "step": 1165 + }, + { + "epoch": 2.5275675675675675, + "grad_norm": 0.9519475102424622, + "learning_rate": 2.0125972152172777e-06, + "loss": 0.1218, + "step": 1170 + }, + { + "epoch": 2.538378378378378, + "grad_norm": 0.8463801741600037, + "learning_rate": 1.924141419971949e-06, + "loss": 0.0968, + "step": 1175 + }, + { + "epoch": 2.5491891891891894, + "grad_norm": 0.900498628616333, + "learning_rate": 1.8375400752251943e-06, + "loss": 0.1054, + "step": 1180 + }, + { + "epoch": 2.56, + "grad_norm": 0.8663133382797241, + "learning_rate": 1.75280546299322e-06, + "loss": 0.0968, + "step": 1185 + }, + { + "epoch": 2.5708108108108108, + "grad_norm": 0.8068374395370483, + "learning_rate": 1.6699496005476244e-06, + "loss": 0.099, + "step": 1190 + }, + { + "epoch": 2.581621621621622, + "grad_norm": 0.7136590480804443, + "learning_rate": 1.5889842387110742e-06, + "loss": 0.1047, + "step": 1195 + }, + { + "epoch": 2.5924324324324326, + "grad_norm": 0.948045551776886, + "learning_rate": 1.5099208601907777e-06, + "loss": 0.1039, + "step": 1200 + }, + { + "epoch": 2.6032432432432433, + "grad_norm": 0.7788717150688171, + "learning_rate": 1.4327706779499927e-06, + "loss": 0.0927, + "step": 1205 + }, + { + "epoch": 2.614054054054054, + "grad_norm": 0.8725752830505371, + "learning_rate": 1.3575446336177506e-06, + "loss": 0.099, + "step": 1210 + }, + { + "epoch": 2.6248648648648647, + "grad_norm": 0.8788983225822449, + "learning_rate": 1.2842533959371066e-06, + "loss": 0.116, + "step": 1215 + }, + { + "epoch": 2.6356756756756754, + "grad_norm": 0.6774969100952148, + "learning_rate": 1.2129073592520685e-06, + "loss": 0.1077, + "step": 1220 + }, + { + "epoch": 2.6464864864864865, + "grad_norm": 0.8134023547172546, + "learning_rate": 1.1435166420334436e-06, + "loss": 0.0987, + "step": 1225 + }, + { + "epoch": 2.6572972972972972, + "grad_norm": 0.9484908580780029, + "learning_rate": 1.0760910854438104e-06, + "loss": 0.104, + "step": 1230 + }, + { + "epoch": 2.668108108108108, + "grad_norm": 1.2464313507080078, + "learning_rate": 1.0106402519418173e-06, + "loss": 0.1009, + "step": 1235 + }, + { + "epoch": 2.678918918918919, + "grad_norm": 0.9004675149917603, + "learning_rate": 9.471734239260288e-07, + "loss": 0.1111, + "step": 1240 + }, + { + "epoch": 2.68972972972973, + "grad_norm": 0.7725552916526794, + "learning_rate": 8.856996024184477e-07, + "loss": 0.0994, + "step": 1245 + }, + { + "epoch": 2.7005405405405405, + "grad_norm": 1.1223211288452148, + "learning_rate": 8.262275057879926e-07, + "loss": 0.0939, + "step": 1250 + }, + { + "epoch": 2.711351351351351, + "grad_norm": 0.8283373117446899, + "learning_rate": 7.6876556851401e-07, + "loss": 0.108, + "step": 1255 + }, + { + "epoch": 2.722162162162162, + "grad_norm": 0.9991348385810852, + "learning_rate": 7.133219399901097e-07, + "loss": 0.1004, + "step": 1260 + }, + { + "epoch": 2.732972972972973, + "grad_norm": 0.8724759221076965, + "learning_rate": 6.599044833683632e-07, + "loss": 0.099, + "step": 1265 + }, + { + "epoch": 2.7437837837837837, + "grad_norm": 0.7052216529846191, + "learning_rate": 6.085207744441529e-07, + "loss": 0.1017, + "step": 1270 + }, + { + "epoch": 2.7545945945945944, + "grad_norm": 0.71845543384552, + "learning_rate": 5.591781005817542e-07, + "loss": 0.1031, + "step": 1275 + }, + { + "epoch": 2.7654054054054056, + "grad_norm": 0.7921403050422668, + "learning_rate": 5.11883459680812e-07, + "loss": 0.1163, + "step": 1280 + }, + { + "epoch": 2.7762162162162163, + "grad_norm": 1.055810570716858, + "learning_rate": 4.6664355918389244e-07, + "loss": 0.1208, + "step": 1285 + }, + { + "epoch": 2.787027027027027, + "grad_norm": 0.9086781740188599, + "learning_rate": 4.234648151252063e-07, + "loss": 0.0968, + "step": 1290 + }, + { + "epoch": 2.7978378378378377, + "grad_norm": 0.786642849445343, + "learning_rate": 3.823533512206845e-07, + "loss": 0.0949, + "step": 1295 + }, + { + "epoch": 2.8086486486486484, + "grad_norm": 0.7988888621330261, + "learning_rate": 3.4331499799948484e-07, + "loss": 0.0983, + "step": 1300 + }, + { + "epoch": 2.8194594594594595, + "grad_norm": 0.8628491759300232, + "learning_rate": 3.063552919770984e-07, + "loss": 0.1036, + "step": 1305 + }, + { + "epoch": 2.8302702702702702, + "grad_norm": 0.9562894701957703, + "learning_rate": 2.7147947487014434e-07, + "loss": 0.1116, + "step": 1310 + }, + { + "epoch": 2.841081081081081, + "grad_norm": 0.8303439617156982, + "learning_rate": 2.38692492852986e-07, + "loss": 0.0981, + "step": 1315 + }, + { + "epoch": 2.851891891891892, + "grad_norm": 0.7052004933357239, + "learning_rate": 2.0799899585623894e-07, + "loss": 0.1065, + "step": 1320 + }, + { + "epoch": 2.8627027027027028, + "grad_norm": 0.8546080589294434, + "learning_rate": 1.7940333690731004e-07, + "loss": 0.1014, + "step": 1325 + }, + { + "epoch": 2.8735135135135135, + "grad_norm": 0.9116050601005554, + "learning_rate": 1.5290957151304795e-07, + "loss": 0.0956, + "step": 1330 + }, + { + "epoch": 2.884324324324324, + "grad_norm": 0.8937197923660278, + "learning_rate": 1.2852145708457498e-07, + "loss": 0.1015, + "step": 1335 + }, + { + "epoch": 2.895135135135135, + "grad_norm": 0.7416398525238037, + "learning_rate": 1.0624245240439811e-07, + "loss": 0.101, + "step": 1340 + }, + { + "epoch": 2.905945945945946, + "grad_norm": 0.8376498222351074, + "learning_rate": 8.607571713588502e-08, + "loss": 0.1156, + "step": 1345 + }, + { + "epoch": 2.9167567567567567, + "grad_norm": 0.795005738735199, + "learning_rate": 6.802411137514897e-08, + "loss": 0.0931, + "step": 1350 + }, + { + "epoch": 2.9275675675675674, + "grad_norm": 0.8672885298728943, + "learning_rate": 5.20901952454167e-08, + "loss": 0.1028, + "step": 1355 + }, + { + "epoch": 2.9383783783783786, + "grad_norm": 0.8281853795051575, + "learning_rate": 3.827622853395551e-08, + "loss": 0.1112, + "step": 1360 + }, + { + "epoch": 2.9491891891891893, + "grad_norm": 0.7827093005180359, + "learning_rate": 2.6584170371578008e-08, + "loss": 0.1049, + "step": 1365 + }, + { + "epoch": 2.96, + "grad_norm": 0.9035359025001526, + "learning_rate": 1.701567895479761e-08, + "loss": 0.1101, + "step": 1370 + }, + { + "epoch": 2.9708108108108107, + "grad_norm": 0.763901948928833, + "learning_rate": 9.572111310653387e-09, + "loss": 0.114, + "step": 1375 + }, + { + "epoch": 2.9816216216216214, + "grad_norm": 0.7890019416809082, + "learning_rate": 4.254523104260666e-09, + "loss": 0.0989, + "step": 1380 + }, + { + "epoch": 2.9924324324324325, + "grad_norm": 0.8369008898735046, + "learning_rate": 1.0636684890874548e-09, + "loss": 0.1054, + "step": 1385 + }, + { + "epoch": 3.0, + "step": 1389, + "total_flos": 1.8970279091854377e+18, + "train_loss": 0.46635930327050884, + "train_runtime": 1484.7838, + "train_samples_per_second": 29.893, + "train_steps_per_second": 0.935 + } + ], + "logging_steps": 5, + "max_steps": 1389, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8970279091854377e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a512510b7e3f00dd3a84eeba88e882040a2c0d65 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca2c8fad61cb5da023e117eb207700e10c3f0189451255b0c236d77f226fd46 +size 8273 diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..99b3f2eac192a6158612e271bfb76a5fbf8aadc5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 1_128_e3_3e-5 + results: [] +--- + + + +# 1_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..98e84314e9c670c863cac21c5248e84baf8c7608 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "o_proj", + "up_proj", + "down_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae0265bafe0f48f4b6c4235a6a68bc4824adfd78 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed4d41ecf7958a8a30a5c6c928360a38dd3d50f62f800bcd8d0ec4147294b27 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbfbc2342b9db8d7cb857380abad1b2135a0eb41 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.508233453999489e+18, + "train_loss": 0.4364819198012456, + "train_runtime": 1183.9617, + "train_samples": 12174, + "train_samples_per_second": 30.847, + "train_steps_per_second": 0.965 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbfbc2342b9db8d7cb857380abad1b2135a0eb41 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.508233453999489e+18, + "train_loss": 0.4364819198012456, + "train_runtime": 1183.9617, + "train_samples": 12174, + "train_samples_per_second": 30.847, + "train_steps_per_second": 0.965 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d6f0f7d6693eb4a2b9db4820d404cf25cda359c4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1639 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1143, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013140604467805518, + "grad_norm": 0.6000319719314575, + "learning_rate": 2.068965517241379e-06, + "loss": 1.4972, + "step": 5 + }, + { + "epoch": 0.026281208935611037, + "grad_norm": 0.5496951937675476, + "learning_rate": 4.655172413793104e-06, + "loss": 1.5573, + "step": 10 + }, + { + "epoch": 0.03942181340341656, + "grad_norm": 0.5497358441352844, + "learning_rate": 7.241379310344828e-06, + "loss": 1.5677, + "step": 15 + }, + { + "epoch": 0.052562417871222074, + "grad_norm": 0.47274050116539, + "learning_rate": 9.827586206896551e-06, + "loss": 1.5092, + "step": 20 + }, + { + "epoch": 0.0657030223390276, + "grad_norm": 0.5257453918457031, + "learning_rate": 1.2413793103448277e-05, + "loss": 1.5321, + "step": 25 + }, + { + "epoch": 0.07884362680683311, + "grad_norm": 0.49069836735725403, + "learning_rate": 1.5e-05, + "loss": 1.4581, + "step": 30 + }, + { + "epoch": 0.09198423127463863, + "grad_norm": 0.6515851616859436, + "learning_rate": 1.7586206896551724e-05, + "loss": 1.4119, + "step": 35 + }, + { + "epoch": 0.10512483574244415, + "grad_norm": 0.4764103591442108, + "learning_rate": 2.017241379310345e-05, + "loss": 1.4419, + "step": 40 + }, + { + "epoch": 0.11826544021024968, + "grad_norm": 0.49834540486335754, + "learning_rate": 2.275862068965517e-05, + "loss": 1.461, + "step": 45 + }, + { + "epoch": 0.1314060446780552, + "grad_norm": 0.5335782170295715, + "learning_rate": 2.5344827586206897e-05, + "loss": 1.3978, + "step": 50 + }, + { + "epoch": 0.1445466491458607, + "grad_norm": 0.5069740414619446, + "learning_rate": 2.793103448275862e-05, + "loss": 1.3683, + "step": 55 + }, + { + "epoch": 0.15768725361366623, + "grad_norm": 0.5438121557235718, + "learning_rate": 2.999993712163665e-05, + "loss": 1.3693, + "step": 60 + }, + { + "epoch": 0.17082785808147175, + "grad_norm": 0.631446897983551, + "learning_rate": 2.9997736434270605e-05, + "loss": 1.3212, + "step": 65 + }, + { + "epoch": 0.18396846254927726, + "grad_norm": 0.5521685481071472, + "learning_rate": 2.9992392355875752e-05, + "loss": 1.3559, + "step": 70 + }, + { + "epoch": 0.19710906701708278, + "grad_norm": 0.6499108076095581, + "learning_rate": 2.9983906006522986e-05, + "loss": 1.2451, + "step": 75 + }, + { + "epoch": 0.2102496714848883, + "grad_norm": 0.6416671872138977, + "learning_rate": 2.9972279164875014e-05, + "loss": 1.2699, + "step": 80 + }, + { + "epoch": 0.2233902759526938, + "grad_norm": 0.6594803333282471, + "learning_rate": 2.9957514267813553e-05, + "loss": 1.2558, + "step": 85 + }, + { + "epoch": 0.23653088042049936, + "grad_norm": 0.6575009822845459, + "learning_rate": 2.993961440992859e-05, + "loss": 1.2514, + "step": 90 + }, + { + "epoch": 0.24967148488830487, + "grad_norm": 0.6749443411827087, + "learning_rate": 2.9918583342869767e-05, + "loss": 1.1903, + "step": 95 + }, + { + "epoch": 0.2628120893561104, + "grad_norm": 0.7291126251220703, + "learning_rate": 2.989442547456011e-05, + "loss": 1.1976, + "step": 100 + }, + { + "epoch": 0.2759526938239159, + "grad_norm": 0.8062726855278015, + "learning_rate": 2.986714586827211e-05, + "loss": 1.1337, + "step": 105 + }, + { + "epoch": 0.2890932982917214, + "grad_norm": 0.7285217046737671, + "learning_rate": 2.9836750241566558e-05, + "loss": 1.1208, + "step": 110 + }, + { + "epoch": 0.30223390275952694, + "grad_norm": 0.7911546230316162, + "learning_rate": 2.9803244965094165e-05, + "loss": 1.0985, + "step": 115 + }, + { + "epoch": 0.31537450722733246, + "grad_norm": 0.8114403486251831, + "learning_rate": 2.976663706126034e-05, + "loss": 1.082, + "step": 120 + }, + { + "epoch": 0.328515111695138, + "grad_norm": 0.771315336227417, + "learning_rate": 2.972693420275336e-05, + "loss": 1.1007, + "step": 125 + }, + { + "epoch": 0.3416557161629435, + "grad_norm": 0.7282198667526245, + "learning_rate": 2.9684144710936236e-05, + "loss": 1.1099, + "step": 130 + }, + { + "epoch": 0.354796320630749, + "grad_norm": 0.9147740006446838, + "learning_rate": 2.9638277554102636e-05, + "loss": 1.0721, + "step": 135 + }, + { + "epoch": 0.3679369250985545, + "grad_norm": 0.8092377781867981, + "learning_rate": 2.9589342345597218e-05, + "loss": 1.0411, + "step": 140 + }, + { + "epoch": 0.38107752956636004, + "grad_norm": 0.8536360263824463, + "learning_rate": 2.953734934180073e-05, + "loss": 0.9824, + "step": 145 + }, + { + "epoch": 0.39421813403416556, + "grad_norm": 0.9517012238502502, + "learning_rate": 2.9482309439980404e-05, + "loss": 0.9272, + "step": 150 + }, + { + "epoch": 0.4073587385019711, + "grad_norm": 0.829008162021637, + "learning_rate": 2.9424234176005957e-05, + "loss": 1.0328, + "step": 155 + }, + { + "epoch": 0.4204993429697766, + "grad_norm": 0.9455755352973938, + "learning_rate": 2.9363135721931798e-05, + "loss": 0.9775, + "step": 160 + }, + { + "epoch": 0.4336399474375821, + "grad_norm": 0.9267396926879883, + "learning_rate": 2.9299026883445846e-05, + "loss": 0.964, + "step": 165 + }, + { + "epoch": 0.4467805519053876, + "grad_norm": 0.9962680339813232, + "learning_rate": 2.9231921097185604e-05, + "loss": 0.9039, + "step": 170 + }, + { + "epoch": 0.45992115637319314, + "grad_norm": 0.9546060562133789, + "learning_rate": 2.916183242792194e-05, + "loss": 0.9568, + "step": 175 + }, + { + "epoch": 0.4730617608409987, + "grad_norm": 1.0466433763504028, + "learning_rate": 2.9088775565611248e-05, + "loss": 0.8752, + "step": 180 + }, + { + "epoch": 0.48620236530880423, + "grad_norm": 0.9733150005340576, + "learning_rate": 2.901276582231656e-05, + "loss": 0.8709, + "step": 185 + }, + { + "epoch": 0.49934296977660975, + "grad_norm": 1.132362723350525, + "learning_rate": 2.893381912899826e-05, + "loss": 0.8662, + "step": 190 + }, + { + "epoch": 0.5124835742444153, + "grad_norm": 1.0795292854309082, + "learning_rate": 2.8851952032175136e-05, + "loss": 0.8259, + "step": 195 + }, + { + "epoch": 0.5256241787122208, + "grad_norm": 1.2181341648101807, + "learning_rate": 2.8767181690456345e-05, + "loss": 0.8681, + "step": 200 + }, + { + "epoch": 0.5387647831800263, + "grad_norm": 1.0852538347244263, + "learning_rate": 2.867952587094512e-05, + "loss": 0.8088, + "step": 205 + }, + { + "epoch": 0.5519053876478318, + "grad_norm": 0.9409130215644836, + "learning_rate": 2.8589002945514987e-05, + "loss": 0.8163, + "step": 210 + }, + { + "epoch": 0.5650459921156373, + "grad_norm": 1.0253455638885498, + "learning_rate": 2.8495631886959126e-05, + "loss": 0.7571, + "step": 215 + }, + { + "epoch": 0.5781865965834428, + "grad_norm": 1.0371079444885254, + "learning_rate": 2.8399432265013887e-05, + "loss": 0.7087, + "step": 220 + }, + { + "epoch": 0.5913272010512484, + "grad_norm": 1.0996919870376587, + "learning_rate": 2.8300424242257125e-05, + "loss": 0.7642, + "step": 225 + }, + { + "epoch": 0.6044678055190539, + "grad_norm": 1.1715346574783325, + "learning_rate": 2.8198628569882328e-05, + "loss": 0.7629, + "step": 230 + }, + { + "epoch": 0.6176084099868594, + "grad_norm": 1.1301043033599854, + "learning_rate": 2.809406658334933e-05, + "loss": 0.7807, + "step": 235 + }, + { + "epoch": 0.6307490144546649, + "grad_norm": 1.1367380619049072, + "learning_rate": 2.7986760197912594e-05, + "loss": 0.7807, + "step": 240 + }, + { + "epoch": 0.6438896189224704, + "grad_norm": 1.2245783805847168, + "learning_rate": 2.7876731904027994e-05, + "loss": 0.7522, + "step": 245 + }, + { + "epoch": 0.657030223390276, + "grad_norm": 1.1590607166290283, + "learning_rate": 2.7764004762638977e-05, + "loss": 0.669, + "step": 250 + }, + { + "epoch": 0.6701708278580815, + "grad_norm": 1.0896775722503662, + "learning_rate": 2.7648602400343235e-05, + "loss": 0.7041, + "step": 255 + }, + { + "epoch": 0.683311432325887, + "grad_norm": 1.1343854665756226, + "learning_rate": 2.7530549004440757e-05, + "loss": 0.7048, + "step": 260 + }, + { + "epoch": 0.6964520367936925, + "grad_norm": 1.302240252494812, + "learning_rate": 2.7409869317864406e-05, + "loss": 0.6561, + "step": 265 + }, + { + "epoch": 0.709592641261498, + "grad_norm": 1.1845916509628296, + "learning_rate": 2.7286588633994e-05, + "loss": 0.6793, + "step": 270 + }, + { + "epoch": 0.7227332457293035, + "grad_norm": 1.3149752616882324, + "learning_rate": 2.7160732791355076e-05, + "loss": 0.634, + "step": 275 + }, + { + "epoch": 0.735873850197109, + "grad_norm": 1.091576099395752, + "learning_rate": 2.7032328168203327e-05, + "loss": 0.664, + "step": 280 + }, + { + "epoch": 0.7490144546649146, + "grad_norm": 1.1638708114624023, + "learning_rate": 2.6901401676996e-05, + "loss": 0.6405, + "step": 285 + }, + { + "epoch": 0.7621550591327201, + "grad_norm": 1.1364482641220093, + "learning_rate": 2.6767980758751264e-05, + "loss": 0.6211, + "step": 290 + }, + { + "epoch": 0.7752956636005256, + "grad_norm": 1.2141324281692505, + "learning_rate": 2.6632093377296796e-05, + "loss": 0.6661, + "step": 295 + }, + { + "epoch": 0.7884362680683311, + "grad_norm": 1.4334216117858887, + "learning_rate": 2.649376801340887e-05, + "loss": 0.5666, + "step": 300 + }, + { + "epoch": 0.8015768725361366, + "grad_norm": 1.2394732236862183, + "learning_rate": 2.6353033658842996e-05, + "loss": 0.5991, + "step": 305 + }, + { + "epoch": 0.8147174770039421, + "grad_norm": 1.3286678791046143, + "learning_rate": 2.6209919810257514e-05, + "loss": 0.5626, + "step": 310 + }, + { + "epoch": 0.8278580814717477, + "grad_norm": 1.191128134727478, + "learning_rate": 2.606445646303138e-05, + "loss": 0.587, + "step": 315 + }, + { + "epoch": 0.8409986859395532, + "grad_norm": 1.3281301259994507, + "learning_rate": 2.591667410497738e-05, + "loss": 0.5776, + "step": 320 + }, + { + "epoch": 0.8541392904073587, + "grad_norm": 1.1658596992492676, + "learning_rate": 2.5766603709952184e-05, + "loss": 0.5748, + "step": 325 + }, + { + "epoch": 0.8672798948751642, + "grad_norm": 1.0998756885528564, + "learning_rate": 2.561427673136446e-05, + "loss": 0.5435, + "step": 330 + }, + { + "epoch": 0.8804204993429697, + "grad_norm": 1.2171285152435303, + "learning_rate": 2.5459725095582577e-05, + "loss": 0.591, + "step": 335 + }, + { + "epoch": 0.8935611038107752, + "grad_norm": 1.1682820320129395, + "learning_rate": 2.5302981195243083e-05, + "loss": 0.5622, + "step": 340 + }, + { + "epoch": 0.9067017082785808, + "grad_norm": 1.2696325778961182, + "learning_rate": 2.5144077882461516e-05, + "loss": 0.5228, + "step": 345 + }, + { + "epoch": 0.9198423127463863, + "grad_norm": 1.2294542789459229, + "learning_rate": 2.4983048461946893e-05, + "loss": 0.5008, + "step": 350 + }, + { + "epoch": 0.9329829172141918, + "grad_norm": 1.1982229948043823, + "learning_rate": 2.4819926684021342e-05, + "loss": 0.5068, + "step": 355 + }, + { + "epoch": 0.9461235216819974, + "grad_norm": 1.2754900455474854, + "learning_rate": 2.4654746737546328e-05, + "loss": 0.4912, + "step": 360 + }, + { + "epoch": 0.9592641261498029, + "grad_norm": 1.1950724124908447, + "learning_rate": 2.4487543242756993e-05, + "loss": 0.5285, + "step": 365 + }, + { + "epoch": 0.9724047306176085, + "grad_norm": 1.1647367477416992, + "learning_rate": 2.4318351244006055e-05, + "loss": 0.5249, + "step": 370 + }, + { + "epoch": 0.985545335085414, + "grad_norm": 1.2416152954101562, + "learning_rate": 2.4147206202418812e-05, + "loss": 0.4966, + "step": 375 + }, + { + "epoch": 0.9986859395532195, + "grad_norm": 1.1909600496292114, + "learning_rate": 2.3974143988460838e-05, + "loss": 0.4392, + "step": 380 + }, + { + "epoch": 1.0105124835742445, + "grad_norm": 1.1483696699142456, + "learning_rate": 2.3799200874419827e-05, + "loss": 0.4171, + "step": 385 + }, + { + "epoch": 1.02365308804205, + "grad_norm": 1.298833966255188, + "learning_rate": 2.3622413526803273e-05, + "loss": 0.3912, + "step": 390 + }, + { + "epoch": 1.0367936925098555, + "grad_norm": 1.4780399799346924, + "learning_rate": 2.3443818998653464e-05, + "loss": 0.4052, + "step": 395 + }, + { + "epoch": 1.049934296977661, + "grad_norm": 1.4238592386245728, + "learning_rate": 2.3263454721781537e-05, + "loss": 0.4899, + "step": 400 + }, + { + "epoch": 1.0630749014454666, + "grad_norm": 1.241113543510437, + "learning_rate": 2.308135849892208e-05, + "loss": 0.4632, + "step": 405 + }, + { + "epoch": 1.076215505913272, + "grad_norm": 1.2949522733688354, + "learning_rate": 2.2897568495810022e-05, + "loss": 0.3383, + "step": 410 + }, + { + "epoch": 1.0893561103810776, + "grad_norm": 1.3433641195297241, + "learning_rate": 2.271212323318144e-05, + "loss": 0.4163, + "step": 415 + }, + { + "epoch": 1.1024967148488831, + "grad_norm": 1.3279451131820679, + "learning_rate": 2.2525061578699962e-05, + "loss": 0.3814, + "step": 420 + }, + { + "epoch": 1.1156373193166886, + "grad_norm": 1.310359239578247, + "learning_rate": 2.233642273881045e-05, + "loss": 0.3702, + "step": 425 + }, + { + "epoch": 1.1287779237844942, + "grad_norm": 1.6063209772109985, + "learning_rate": 2.2146246250521677e-05, + "loss": 0.3602, + "step": 430 + }, + { + "epoch": 1.1419185282522997, + "grad_norm": 1.2085447311401367, + "learning_rate": 2.1954571973119726e-05, + "loss": 0.3885, + "step": 435 + }, + { + "epoch": 1.1550591327201052, + "grad_norm": 1.2866445779800415, + "learning_rate": 2.1761440079813845e-05, + "loss": 0.3897, + "step": 440 + }, + { + "epoch": 1.1681997371879107, + "grad_norm": 1.2492274045944214, + "learning_rate": 2.1566891049316515e-05, + "loss": 0.372, + "step": 445 + }, + { + "epoch": 1.1813403416557162, + "grad_norm": 1.3012983798980713, + "learning_rate": 2.137096565735943e-05, + "loss": 0.3672, + "step": 450 + }, + { + "epoch": 1.1944809461235217, + "grad_norm": 1.2833282947540283, + "learning_rate": 2.1173704968147327e-05, + "loss": 0.3211, + "step": 455 + }, + { + "epoch": 1.2076215505913273, + "grad_norm": 1.3121607303619385, + "learning_rate": 2.0975150325751262e-05, + "loss": 0.3702, + "step": 460 + }, + { + "epoch": 1.2207621550591328, + "grad_norm": 1.1029564142227173, + "learning_rate": 2.0775343345443267e-05, + "loss": 0.3338, + "step": 465 + }, + { + "epoch": 1.2339027595269383, + "grad_norm": 1.213159441947937, + "learning_rate": 2.057432590497418e-05, + "loss": 0.3315, + "step": 470 + }, + { + "epoch": 1.2470433639947438, + "grad_norm": 1.377474308013916, + "learning_rate": 2.0372140135796407e-05, + "loss": 0.373, + "step": 475 + }, + { + "epoch": 1.2601839684625493, + "grad_norm": 1.1862900257110596, + "learning_rate": 2.01688284142336e-05, + "loss": 0.3438, + "step": 480 + }, + { + "epoch": 1.2733245729303548, + "grad_norm": 1.2109731435775757, + "learning_rate": 1.9964433352598913e-05, + "loss": 0.323, + "step": 485 + }, + { + "epoch": 1.2864651773981604, + "grad_norm": 1.1861048936843872, + "learning_rate": 1.975899779026386e-05, + "loss": 0.3499, + "step": 490 + }, + { + "epoch": 1.2996057818659659, + "grad_norm": 1.3072450160980225, + "learning_rate": 1.955256478467959e-05, + "loss": 0.308, + "step": 495 + }, + { + "epoch": 1.3127463863337714, + "grad_norm": 1.4458948373794556, + "learning_rate": 1.9345177602352386e-05, + "loss": 0.3279, + "step": 500 + }, + { + "epoch": 1.325886990801577, + "grad_norm": 1.1371740102767944, + "learning_rate": 1.9136879709775424e-05, + "loss": 0.3243, + "step": 505 + }, + { + "epoch": 1.3390275952693824, + "grad_norm": 1.408117651939392, + "learning_rate": 1.8927714764318588e-05, + "loss": 0.3347, + "step": 510 + }, + { + "epoch": 1.352168199737188, + "grad_norm": 1.39098060131073, + "learning_rate": 1.871772660507826e-05, + "loss": 0.3161, + "step": 515 + }, + { + "epoch": 1.3653088042049935, + "grad_norm": 1.3169364929199219, + "learning_rate": 1.8506959243689043e-05, + "loss": 0.2942, + "step": 520 + }, + { + "epoch": 1.378449408672799, + "grad_norm": 1.1352839469909668, + "learning_rate": 1.829545685509934e-05, + "loss": 0.3084, + "step": 525 + }, + { + "epoch": 1.3915900131406045, + "grad_norm": 1.2993459701538086, + "learning_rate": 1.8083263768312645e-05, + "loss": 0.3009, + "step": 530 + }, + { + "epoch": 1.40473061760841, + "grad_norm": 1.206346869468689, + "learning_rate": 1.7870424457096593e-05, + "loss": 0.2734, + "step": 535 + }, + { + "epoch": 1.4178712220762155, + "grad_norm": 1.3716164827346802, + "learning_rate": 1.765698353066169e-05, + "loss": 0.2508, + "step": 540 + }, + { + "epoch": 1.431011826544021, + "grad_norm": 1.2523376941680908, + "learning_rate": 1.7442985724311566e-05, + "loss": 0.2903, + "step": 545 + }, + { + "epoch": 1.4441524310118266, + "grad_norm": 1.4354424476623535, + "learning_rate": 1.7228475890066908e-05, + "loss": 0.3164, + "step": 550 + }, + { + "epoch": 1.457293035479632, + "grad_norm": 1.2621711492538452, + "learning_rate": 1.7013498987264832e-05, + "loss": 0.2945, + "step": 555 + }, + { + "epoch": 1.4704336399474376, + "grad_norm": 1.4864765405654907, + "learning_rate": 1.6798100073135865e-05, + "loss": 0.2666, + "step": 560 + }, + { + "epoch": 1.483574244415243, + "grad_norm": 1.1217767000198364, + "learning_rate": 1.6582324293360298e-05, + "loss": 0.2586, + "step": 565 + }, + { + "epoch": 1.4967148488830486, + "grad_norm": 1.280014157295227, + "learning_rate": 1.6366216872606098e-05, + "loss": 0.2636, + "step": 570 + }, + { + "epoch": 1.5098554533508541, + "grad_norm": 1.2222352027893066, + "learning_rate": 1.6149823105050187e-05, + "loss": 0.2545, + "step": 575 + }, + { + "epoch": 1.5229960578186597, + "grad_norm": 1.3205738067626953, + "learning_rate": 1.5933188344885232e-05, + "loss": 0.2863, + "step": 580 + }, + { + "epoch": 1.5361366622864652, + "grad_norm": 1.3537684679031372, + "learning_rate": 1.5716357996813773e-05, + "loss": 0.2505, + "step": 585 + }, + { + "epoch": 1.5492772667542707, + "grad_norm": 1.2179908752441406, + "learning_rate": 1.5499377506531818e-05, + "loss": 0.2522, + "step": 590 + }, + { + "epoch": 1.5624178712220762, + "grad_norm": 1.3018555641174316, + "learning_rate": 1.5282292351203847e-05, + "loss": 0.2543, + "step": 595 + }, + { + "epoch": 1.5755584756898817, + "grad_norm": 1.1383919715881348, + "learning_rate": 1.5065148029931195e-05, + "loss": 0.2768, + "step": 600 + }, + { + "epoch": 1.5886990801576872, + "grad_norm": 1.245634913444519, + "learning_rate": 1.484799005421584e-05, + "loss": 0.2381, + "step": 605 + }, + { + "epoch": 1.6018396846254928, + "grad_norm": 1.0681072473526, + "learning_rate": 1.4630863938421603e-05, + "loss": 0.2664, + "step": 610 + }, + { + "epoch": 1.6149802890932983, + "grad_norm": 1.3179540634155273, + "learning_rate": 1.4413815190234777e-05, + "loss": 0.2588, + "step": 615 + }, + { + "epoch": 1.6281208935611038, + "grad_norm": 1.1153942346572876, + "learning_rate": 1.419688930112607e-05, + "loss": 0.2759, + "step": 620 + }, + { + "epoch": 1.6412614980289093, + "grad_norm": 1.6008579730987549, + "learning_rate": 1.3980131736816048e-05, + "loss": 0.2377, + "step": 625 + }, + { + "epoch": 1.6544021024967148, + "grad_norm": 1.1769739389419556, + "learning_rate": 1.3763587927745898e-05, + "loss": 0.2258, + "step": 630 + }, + { + "epoch": 1.6675427069645203, + "grad_norm": 1.1792428493499756, + "learning_rate": 1.3547303259555625e-05, + "loss": 0.2356, + "step": 635 + }, + { + "epoch": 1.6806833114323259, + "grad_norm": 1.315924048423767, + "learning_rate": 1.3331323063571647e-05, + "loss": 0.2526, + "step": 640 + }, + { + "epoch": 1.6938239159001314, + "grad_norm": 1.220938801765442, + "learning_rate": 1.3115692607305718e-05, + "loss": 0.2136, + "step": 645 + }, + { + "epoch": 1.7069645203679369, + "grad_norm": 1.235229253768921, + "learning_rate": 1.2900457084967302e-05, + "loss": 0.2392, + "step": 650 + }, + { + "epoch": 1.7201051248357424, + "grad_norm": 1.2829138040542603, + "learning_rate": 1.2685661607991238e-05, + "loss": 0.232, + "step": 655 + }, + { + "epoch": 1.733245729303548, + "grad_norm": 1.139499545097351, + "learning_rate": 1.2471351195582811e-05, + "loss": 0.1879, + "step": 660 + }, + { + "epoch": 1.7463863337713534, + "grad_norm": 1.2732905149459839, + "learning_rate": 1.2257570765282127e-05, + "loss": 0.2086, + "step": 665 + }, + { + "epoch": 1.759526938239159, + "grad_norm": 1.077530860900879, + "learning_rate": 1.2044365123549791e-05, + "loss": 0.2101, + "step": 670 + }, + { + "epoch": 1.7726675427069645, + "grad_norm": 1.1747920513153076, + "learning_rate": 1.183177895637589e-05, + "loss": 0.2287, + "step": 675 + }, + { + "epoch": 1.78580814717477, + "grad_norm": 1.350934624671936, + "learning_rate": 1.1619856819914186e-05, + "loss": 0.2231, + "step": 680 + }, + { + "epoch": 1.7989487516425755, + "grad_norm": 1.1574058532714844, + "learning_rate": 1.1408643131143566e-05, + "loss": 0.1953, + "step": 685 + }, + { + "epoch": 1.812089356110381, + "grad_norm": 1.9376565217971802, + "learning_rate": 1.1198182158558638e-05, + "loss": 0.1884, + "step": 690 + }, + { + "epoch": 1.8252299605781865, + "grad_norm": 1.1832637786865234, + "learning_rate": 1.098851801289144e-05, + "loss": 0.2011, + "step": 695 + }, + { + "epoch": 1.838370565045992, + "grad_norm": 1.1152653694152832, + "learning_rate": 1.0779694637866257e-05, + "loss": 0.1729, + "step": 700 + }, + { + "epoch": 1.8515111695137976, + "grad_norm": 1.2615498304367065, + "learning_rate": 1.0571755800989367e-05, + "loss": 0.1862, + "step": 705 + }, + { + "epoch": 1.864651773981603, + "grad_norm": 1.2463351488113403, + "learning_rate": 1.036474508437579e-05, + "loss": 0.1971, + "step": 710 + }, + { + "epoch": 1.8777923784494086, + "grad_norm": 1.2754261493682861, + "learning_rate": 1.0158705875614877e-05, + "loss": 0.1672, + "step": 715 + }, + { + "epoch": 1.8909329829172141, + "grad_norm": 1.2041103839874268, + "learning_rate": 9.953681358676622e-06, + "loss": 0.1718, + "step": 720 + }, + { + "epoch": 1.9040735873850196, + "grad_norm": 1.223314642906189, + "learning_rate": 9.749714504860753e-06, + "loss": 0.173, + "step": 725 + }, + { + "epoch": 1.9172141918528252, + "grad_norm": 1.2831774950027466, + "learning_rate": 9.54684806379026e-06, + "loss": 0.1908, + "step": 730 + }, + { + "epoch": 1.9303547963206307, + "grad_norm": 1.1934982538223267, + "learning_rate": 9.345124554451506e-06, + "loss": 0.1715, + "step": 735 + }, + { + "epoch": 1.9434954007884362, + "grad_norm": 1.3165861368179321, + "learning_rate": 9.144586256282619e-06, + "loss": 0.1691, + "step": 740 + }, + { + "epoch": 1.9566360052562417, + "grad_norm": 1.2910155057907104, + "learning_rate": 8.945275200312085e-06, + "loss": 0.1665, + "step": 745 + }, + { + "epoch": 1.9697766097240472, + "grad_norm": 1.0195131301879883, + "learning_rate": 8.747233160349434e-06, + "loss": 0.2066, + "step": 750 + }, + { + "epoch": 1.9829172141918527, + "grad_norm": 1.1912403106689453, + "learning_rate": 8.550501644229846e-06, + "loss": 0.1763, + "step": 755 + }, + { + "epoch": 1.9960578186596583, + "grad_norm": 1.1489989757537842, + "learning_rate": 8.355121885114439e-06, + "loss": 0.1569, + "step": 760 + }, + { + "epoch": 2.0078843626806835, + "grad_norm": 0.9988665580749512, + "learning_rate": 8.161134832848186e-06, + "loss": 0.1521, + "step": 765 + }, + { + "epoch": 2.021024967148489, + "grad_norm": 1.0327770709991455, + "learning_rate": 7.968581145377205e-06, + "loss": 0.1561, + "step": 770 + }, + { + "epoch": 2.0341655716162945, + "grad_norm": 0.9528421759605408, + "learning_rate": 7.777501180227199e-06, + "loss": 0.1392, + "step": 775 + }, + { + "epoch": 2.0473061760841, + "grad_norm": 1.0644992589950562, + "learning_rate": 7.587934986044916e-06, + "loss": 0.137, + "step": 780 + }, + { + "epoch": 2.0604467805519056, + "grad_norm": 1.3147528171539307, + "learning_rate": 7.3999222942042635e-06, + "loss": 0.141, + "step": 785 + }, + { + "epoch": 2.073587385019711, + "grad_norm": 1.1737008094787598, + "learning_rate": 7.213502510478993e-06, + "loss": 0.1148, + "step": 790 + }, + { + "epoch": 2.0867279894875166, + "grad_norm": 0.9637901186943054, + "learning_rate": 7.028714706783626e-06, + "loss": 0.1268, + "step": 795 + }, + { + "epoch": 2.099868593955322, + "grad_norm": 0.9228414297103882, + "learning_rate": 6.845597612984288e-06, + "loss": 0.1322, + "step": 800 + }, + { + "epoch": 2.1130091984231276, + "grad_norm": 0.8965508341789246, + "learning_rate": 6.664189608781295e-06, + "loss": 0.1472, + "step": 805 + }, + { + "epoch": 2.126149802890933, + "grad_norm": 1.1861528158187866, + "learning_rate": 6.484528715665131e-06, + "loss": 0.13, + "step": 810 + }, + { + "epoch": 2.1392904073587387, + "grad_norm": 0.9378185272216797, + "learning_rate": 6.306652588947454e-06, + "loss": 0.1244, + "step": 815 + }, + { + "epoch": 2.152431011826544, + "grad_norm": 1.1733484268188477, + "learning_rate": 6.130598509868895e-06, + "loss": 0.1194, + "step": 820 + }, + { + "epoch": 2.1655716162943497, + "grad_norm": 0.8082630038261414, + "learning_rate": 5.95640337778525e-06, + "loss": 0.1337, + "step": 825 + }, + { + "epoch": 2.178712220762155, + "grad_norm": 0.8850849866867065, + "learning_rate": 5.784103702433685e-06, + "loss": 0.1245, + "step": 830 + }, + { + "epoch": 2.1918528252299607, + "grad_norm": 1.1383304595947266, + "learning_rate": 5.613735596280661e-06, + "loss": 0.1305, + "step": 835 + }, + { + "epoch": 2.2049934296977662, + "grad_norm": 1.2028309106826782, + "learning_rate": 5.445334766953037e-06, + "loss": 0.1279, + "step": 840 + }, + { + "epoch": 2.2181340341655718, + "grad_norm": 0.9609809517860413, + "learning_rate": 5.278936509754112e-06, + "loss": 0.1321, + "step": 845 + }, + { + "epoch": 2.2312746386333773, + "grad_norm": 1.1674765348434448, + "learning_rate": 5.114575700266024e-06, + "loss": 0.117, + "step": 850 + }, + { + "epoch": 2.244415243101183, + "grad_norm": 0.9462134838104248, + "learning_rate": 4.95228678704014e-06, + "loss": 0.1091, + "step": 855 + }, + { + "epoch": 2.2575558475689883, + "grad_norm": 0.9706594944000244, + "learning_rate": 4.7921037843769614e-06, + "loss": 0.1166, + "step": 860 + }, + { + "epoch": 2.270696452036794, + "grad_norm": 1.1274670362472534, + "learning_rate": 4.6340602651970304e-06, + "loss": 0.1172, + "step": 865 + }, + { + "epoch": 2.2838370565045993, + "grad_norm": 1.4383842945098877, + "learning_rate": 4.478189354004334e-06, + "loss": 0.1325, + "step": 870 + }, + { + "epoch": 2.296977660972405, + "grad_norm": 0.9975386261940002, + "learning_rate": 4.324523719943716e-06, + "loss": 0.1352, + "step": 875 + }, + { + "epoch": 2.3101182654402104, + "grad_norm": 1.0504313707351685, + "learning_rate": 4.173095569953708e-06, + "loss": 0.103, + "step": 880 + }, + { + "epoch": 2.323258869908016, + "grad_norm": 1.1191004514694214, + "learning_rate": 4.023936642016266e-06, + "loss": 0.1276, + "step": 885 + }, + { + "epoch": 2.3363994743758214, + "grad_norm": 0.8486424088478088, + "learning_rate": 3.87707819850474e-06, + "loss": 0.118, + "step": 890 + }, + { + "epoch": 2.349540078843627, + "grad_norm": 1.0684998035430908, + "learning_rate": 3.7325510196315964e-06, + "loss": 0.1137, + "step": 895 + }, + { + "epoch": 2.3626806833114324, + "grad_norm": 0.7870803475379944, + "learning_rate": 3.5903853969971335e-06, + "loss": 0.089, + "step": 900 + }, + { + "epoch": 2.375821287779238, + "grad_norm": 1.016579270362854, + "learning_rate": 3.450611127240646e-06, + "loss": 0.1046, + "step": 905 + }, + { + "epoch": 2.3889618922470435, + "grad_norm": 1.0224157571792603, + "learning_rate": 3.313257505795317e-06, + "loss": 0.1013, + "step": 910 + }, + { + "epoch": 2.402102496714849, + "grad_norm": 0.7343006730079651, + "learning_rate": 3.1783533207481537e-06, + "loss": 0.0943, + "step": 915 + }, + { + "epoch": 2.4152431011826545, + "grad_norm": 1.0326985120773315, + "learning_rate": 3.045926846806277e-06, + "loss": 0.1095, + "step": 920 + }, + { + "epoch": 2.42838370565046, + "grad_norm": 0.957417368888855, + "learning_rate": 2.9160058393707656e-06, + "loss": 0.1114, + "step": 925 + }, + { + "epoch": 2.4415243101182655, + "grad_norm": 1.1126303672790527, + "learning_rate": 2.7886175287194142e-06, + "loss": 0.1006, + "step": 930 + }, + { + "epoch": 2.454664914586071, + "grad_norm": 0.755524218082428, + "learning_rate": 2.6637886142994725e-06, + "loss": 0.0962, + "step": 935 + }, + { + "epoch": 2.4678055190538766, + "grad_norm": 0.8578504323959351, + "learning_rate": 2.5415452591317023e-06, + "loss": 0.1128, + "step": 940 + }, + { + "epoch": 2.480946123521682, + "grad_norm": 0.9237407445907593, + "learning_rate": 2.4219130843268362e-06, + "loss": 0.1326, + "step": 945 + }, + { + "epoch": 2.4940867279894876, + "grad_norm": 0.7806246876716614, + "learning_rate": 2.304917163715636e-06, + "loss": 0.1042, + "step": 950 + }, + { + "epoch": 2.507227332457293, + "grad_norm": 0.7509581446647644, + "learning_rate": 2.1905820185936172e-06, + "loss": 0.0879, + "step": 955 + }, + { + "epoch": 2.5203679369250986, + "grad_norm": 1.1201022863388062, + "learning_rate": 2.0789316125816275e-06, + "loss": 0.1126, + "step": 960 + }, + { + "epoch": 2.533508541392904, + "grad_norm": 0.8480803966522217, + "learning_rate": 1.9699893466032733e-06, + "loss": 0.101, + "step": 965 + }, + { + "epoch": 2.5466491458607097, + "grad_norm": 0.7627381086349487, + "learning_rate": 1.8637780539803118e-06, + "loss": 0.1037, + "step": 970 + }, + { + "epoch": 2.559789750328515, + "grad_norm": 1.0410747528076172, + "learning_rate": 1.760319995646968e-06, + "loss": 0.0976, + "step": 975 + }, + { + "epoch": 2.5729303547963207, + "grad_norm": 0.8388499021530151, + "learning_rate": 1.6596368554842673e-06, + "loss": 0.109, + "step": 980 + }, + { + "epoch": 2.5860709592641262, + "grad_norm": 0.7287879586219788, + "learning_rate": 1.5617497357752724e-06, + "loss": 0.0952, + "step": 985 + }, + { + "epoch": 2.5992115637319317, + "grad_norm": 0.8424334526062012, + "learning_rate": 1.4666791527822377e-06, + "loss": 0.0934, + "step": 990 + }, + { + "epoch": 2.6123521681997373, + "grad_norm": 0.7682844996452332, + "learning_rate": 1.374445032446588e-06, + "loss": 0.0814, + "step": 995 + }, + { + "epoch": 2.6254927726675428, + "grad_norm": 0.7012391686439514, + "learning_rate": 1.285066706212612e-06, + "loss": 0.1003, + "step": 1000 + }, + { + "epoch": 2.6386333771353483, + "grad_norm": 0.7718296647071838, + "learning_rate": 1.1985629069757847e-06, + "loss": 0.1093, + "step": 1005 + }, + { + "epoch": 2.651773981603154, + "grad_norm": 0.8238117098808289, + "learning_rate": 1.1149517651564944e-06, + "loss": 0.1307, + "step": 1010 + }, + { + "epoch": 2.6649145860709593, + "grad_norm": 0.7402698993682861, + "learning_rate": 1.034250804900081e-06, + "loss": 0.1022, + "step": 1015 + }, + { + "epoch": 2.678055190538765, + "grad_norm": 0.9563916325569153, + "learning_rate": 9.56476940403942e-07, + "loss": 0.1193, + "step": 1020 + }, + { + "epoch": 2.6911957950065704, + "grad_norm": 0.8082496523857117, + "learning_rate": 8.816464723724504e-07, + "loss": 0.0984, + "step": 1025 + }, + { + "epoch": 2.704336399474376, + "grad_norm": 0.810088038444519, + "learning_rate": 8.097750846004909e-07, + "loss": 0.0873, + "step": 1030 + }, + { + "epoch": 2.7174770039421814, + "grad_norm": 0.8148086071014404, + "learning_rate": 7.40877840686257e-07, + "loss": 0.0998, + "step": 1035 + }, + { + "epoch": 2.730617608409987, + "grad_norm": 1.008510708808899, + "learning_rate": 6.749691808740777e-07, + "loss": 0.0973, + "step": 1040 + }, + { + "epoch": 2.7437582128777924, + "grad_norm": 0.5810213088989258, + "learning_rate": 6.120629190278554e-07, + "loss": 0.0787, + "step": 1045 + }, + { + "epoch": 2.756898817345598, + "grad_norm": 0.7217281460762024, + "learning_rate": 5.521722397358132e-07, + "loss": 0.0849, + "step": 1050 + }, + { + "epoch": 2.7700394218134035, + "grad_norm": 0.8091464042663574, + "learning_rate": 4.953096955471142e-07, + "loss": 0.0937, + "step": 1055 + }, + { + "epoch": 2.783180026281209, + "grad_norm": 0.7312224507331848, + "learning_rate": 4.414872043409757e-07, + "loss": 0.0951, + "step": 1060 + }, + { + "epoch": 2.7963206307490145, + "grad_norm": 0.834337592124939, + "learning_rate": 3.907160468287707e-07, + "loss": 0.1011, + "step": 1065 + }, + { + "epoch": 2.80946123521682, + "grad_norm": 0.7655376195907593, + "learning_rate": 3.4300686418970383e-07, + "loss": 0.0961, + "step": 1070 + }, + { + "epoch": 2.8226018396846255, + "grad_norm": 0.7533838152885437, + "learning_rate": 2.9836965584051303e-07, + "loss": 0.0909, + "step": 1075 + }, + { + "epoch": 2.835742444152431, + "grad_norm": 0.8285064101219177, + "learning_rate": 2.568137773396745e-07, + "loss": 0.0974, + "step": 1080 + }, + { + "epoch": 2.8488830486202366, + "grad_norm": 0.8482531309127808, + "learning_rate": 2.183479384265713e-07, + "loss": 0.0947, + "step": 1085 + }, + { + "epoch": 2.862023653088042, + "grad_norm": 0.7816241383552551, + "learning_rate": 1.8298020119600856e-07, + "loss": 0.0947, + "step": 1090 + }, + { + "epoch": 2.8751642575558476, + "grad_norm": 0.6632421612739563, + "learning_rate": 1.5071797840846336e-07, + "loss": 0.0974, + "step": 1095 + }, + { + "epoch": 2.888304862023653, + "grad_norm": 0.7103014588356018, + "learning_rate": 1.215680319364443e-07, + "loss": 0.0868, + "step": 1100 + }, + { + "epoch": 2.9014454664914586, + "grad_norm": 0.7530989646911621, + "learning_rate": 9.553647134726173e-08, + "loss": 0.093, + "step": 1105 + }, + { + "epoch": 2.914586070959264, + "grad_norm": 0.7976528406143188, + "learning_rate": 7.262875262251389e-08, + "loss": 0.0912, + "step": 1110 + }, + { + "epoch": 2.9277266754270697, + "grad_norm": 0.6478866338729858, + "learning_rate": 5.2849677014566445e-08, + "loss": 0.0896, + "step": 1115 + }, + { + "epoch": 2.940867279894875, + "grad_norm": 0.8027013540267944, + "learning_rate": 3.620339004025086e-08, + "loss": 0.0939, + "step": 1120 + }, + { + "epoch": 2.9540078843626807, + "grad_norm": 0.6769530773162842, + "learning_rate": 2.2693380612002767e-08, + "loss": 0.09, + "step": 1125 + }, + { + "epoch": 2.967148488830486, + "grad_norm": 0.7123112082481384, + "learning_rate": 1.2322480306615202e-08, + "loss": 0.0866, + "step": 1130 + }, + { + "epoch": 2.9802890932982917, + "grad_norm": 0.9540034532546997, + "learning_rate": 5.092862771765017e-09, + "loss": 0.1049, + "step": 1135 + }, + { + "epoch": 2.9934296977660972, + "grad_norm": 1.0238860845565796, + "learning_rate": 1.006043270438961e-09, + "loss": 0.1015, + "step": 1140 + }, + { + "epoch": 3.0, + "step": 1143, + "total_flos": 1.508233453999489e+18, + "train_loss": 0.4364819198012456, + "train_runtime": 1183.9617, + "train_samples_per_second": 30.847, + "train_steps_per_second": 0.965 + } + ], + "logging_steps": 5, + "max_steps": 1143, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.508233453999489e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..86abd7e569d48c258b26948d82312fd19e8ace6f --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/1_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fbd0eef003e2f5dd7807d65c62b330bfd1dbcc0887fd028a054614f6c7c2f5d +size 8273 diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a01e73f03c50536a5913df64de2f7f25d3f61a92 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 2_128_e3_3e-5 + results: [] +--- + + + +# 2_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c0aa8076dee874af34e28b40dcd9a01e5f258ca --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "up_proj", + "k_proj", + "gate_proj", + "v_proj", + "down_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c02169cb393b97b2444b370ed860c8f1b0c2c628 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bee9c608d5aa05e65acb2d222a78ab062e4110749e2cc3159ea1b02a9b5b6e87 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3eba90cafb79dcee44e0fab52af20d9302b175e4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5604276376528486e+18, + "train_loss": 0.42234487697445855, + "train_runtime": 1219.9361, + "train_samples": 13124, + "train_samples_per_second": 32.274, + "train_steps_per_second": 1.011 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3eba90cafb79dcee44e0fab52af20d9302b175e4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5604276376528486e+18, + "train_loss": 0.42234487697445855, + "train_runtime": 1219.9361, + "train_samples": 13124, + "train_samples_per_second": 32.274, + "train_steps_per_second": 1.011 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8534988031d2f4cd46cced7ac29e264904254806 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1765 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1233, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01218769043266301, + "grad_norm": 0.7004920244216919, + "learning_rate": 1.935483870967742e-06, + "loss": 1.5605, + "step": 5 + }, + { + "epoch": 0.02437538086532602, + "grad_norm": 0.6172131299972534, + "learning_rate": 4.35483870967742e-06, + "loss": 1.5638, + "step": 10 + }, + { + "epoch": 0.03656307129798903, + "grad_norm": 0.5734322667121887, + "learning_rate": 6.774193548387097e-06, + "loss": 1.5208, + "step": 15 + }, + { + "epoch": 0.04875076173065204, + "grad_norm": 0.6661986708641052, + "learning_rate": 9.193548387096775e-06, + "loss": 1.5057, + "step": 20 + }, + { + "epoch": 0.06093845216331505, + "grad_norm": 0.6152777671813965, + "learning_rate": 1.1612903225806451e-05, + "loss": 1.4496, + "step": 25 + }, + { + "epoch": 0.07312614259597806, + "grad_norm": 0.5623178482055664, + "learning_rate": 1.403225806451613e-05, + "loss": 1.5119, + "step": 30 + }, + { + "epoch": 0.08531383302864107, + "grad_norm": 0.5068685412406921, + "learning_rate": 1.6451612903225807e-05, + "loss": 1.406, + "step": 35 + }, + { + "epoch": 0.09750152346130408, + "grad_norm": 0.49543696641921997, + "learning_rate": 1.8870967741935484e-05, + "loss": 1.4775, + "step": 40 + }, + { + "epoch": 0.10968921389396709, + "grad_norm": 0.4573074281215668, + "learning_rate": 2.1290322580645163e-05, + "loss": 1.4615, + "step": 45 + }, + { + "epoch": 0.1218769043266301, + "grad_norm": 0.5216368436813354, + "learning_rate": 2.370967741935484e-05, + "loss": 1.4569, + "step": 50 + }, + { + "epoch": 0.1340645947592931, + "grad_norm": 0.5219162702560425, + "learning_rate": 2.6129032258064516e-05, + "loss": 1.3812, + "step": 55 + }, + { + "epoch": 0.14625228519195613, + "grad_norm": 0.46936264634132385, + "learning_rate": 2.8548387096774196e-05, + "loss": 1.3489, + "step": 60 + }, + { + "epoch": 0.15843997562461914, + "grad_norm": 0.6239922046661377, + "learning_rate": 2.9999784073389242e-05, + "loss": 1.3895, + "step": 65 + }, + { + "epoch": 0.17062766605728213, + "grad_norm": 0.5736918449401855, + "learning_rate": 2.999735497041112e-05, + "loss": 1.4063, + "step": 70 + }, + { + "epoch": 0.18281535648994515, + "grad_norm": 0.6075035929679871, + "learning_rate": 2.9992227294732908e-05, + "loss": 1.307, + "step": 75 + }, + { + "epoch": 0.19500304692260817, + "grad_norm": 0.5670231580734253, + "learning_rate": 2.9984401969011014e-05, + "loss": 1.3389, + "step": 80 + }, + { + "epoch": 0.2071907373552712, + "grad_norm": 0.6823067665100098, + "learning_rate": 2.9973880401307747e-05, + "loss": 1.2552, + "step": 85 + }, + { + "epoch": 0.21937842778793418, + "grad_norm": 0.605379581451416, + "learning_rate": 2.9960664484837977e-05, + "loss": 1.2471, + "step": 90 + }, + { + "epoch": 0.2315661182205972, + "grad_norm": 0.7157903909683228, + "learning_rate": 2.994475659762846e-05, + "loss": 1.243, + "step": 95 + }, + { + "epoch": 0.2437538086532602, + "grad_norm": 0.6897332072257996, + "learning_rate": 2.9926159602089955e-05, + "loss": 1.1968, + "step": 100 + }, + { + "epoch": 0.25594149908592323, + "grad_norm": 0.6656405329704285, + "learning_rate": 2.990487684450215e-05, + "loss": 1.1778, + "step": 105 + }, + { + "epoch": 0.2681291895185862, + "grad_norm": 0.7569514513015747, + "learning_rate": 2.988091215441158e-05, + "loss": 1.1913, + "step": 110 + }, + { + "epoch": 0.28031687995124926, + "grad_norm": 0.8680997490882874, + "learning_rate": 2.985426984394251e-05, + "loss": 1.171, + "step": 115 + }, + { + "epoch": 0.29250457038391225, + "grad_norm": 0.8345737457275391, + "learning_rate": 2.9824954707021058e-05, + "loss": 1.1053, + "step": 120 + }, + { + "epoch": 0.30469226081657524, + "grad_norm": 0.7626073360443115, + "learning_rate": 2.9792972018512597e-05, + "loss": 1.1482, + "step": 125 + }, + { + "epoch": 0.3168799512492383, + "grad_norm": 0.8988826274871826, + "learning_rate": 2.9758327533272567e-05, + "loss": 1.0884, + "step": 130 + }, + { + "epoch": 0.3290676416819013, + "grad_norm": 0.9589856863021851, + "learning_rate": 2.9721027485111025e-05, + "loss": 1.0593, + "step": 135 + }, + { + "epoch": 0.34125533211456427, + "grad_norm": 0.8105655908584595, + "learning_rate": 2.9681078585670912e-05, + "loss": 1.0028, + "step": 140 + }, + { + "epoch": 0.3534430225472273, + "grad_norm": 0.8292884826660156, + "learning_rate": 2.9638488023220403e-05, + "loss": 1.0442, + "step": 145 + }, + { + "epoch": 0.3656307129798903, + "grad_norm": 0.808687686920166, + "learning_rate": 2.9593263461359465e-05, + "loss": 1.0202, + "step": 150 + }, + { + "epoch": 0.3778184034125533, + "grad_norm": 0.8777639269828796, + "learning_rate": 2.9545413037640906e-05, + "loss": 0.9755, + "step": 155 + }, + { + "epoch": 0.39000609384521634, + "grad_norm": 0.9471386671066284, + "learning_rate": 2.9494945362106125e-05, + "loss": 0.9399, + "step": 160 + }, + { + "epoch": 0.40219378427787933, + "grad_norm": 0.9394040703773499, + "learning_rate": 2.9441869515735843e-05, + "loss": 0.9225, + "step": 165 + }, + { + "epoch": 0.4143814747105424, + "grad_norm": 1.0221147537231445, + "learning_rate": 2.938619504881612e-05, + "loss": 0.9708, + "step": 170 + }, + { + "epoch": 0.42656916514320536, + "grad_norm": 0.8763048648834229, + "learning_rate": 2.9327931979219895e-05, + "loss": 0.9198, + "step": 175 + }, + { + "epoch": 0.43875685557586835, + "grad_norm": 0.9301382899284363, + "learning_rate": 2.926709079060441e-05, + "loss": 0.9186, + "step": 180 + }, + { + "epoch": 0.4509445460085314, + "grad_norm": 1.0150519609451294, + "learning_rate": 2.920368243052481e-05, + "loss": 0.9288, + "step": 185 + }, + { + "epoch": 0.4631322364411944, + "grad_norm": 1.1035261154174805, + "learning_rate": 2.9137718308464297e-05, + "loss": 0.8586, + "step": 190 + }, + { + "epoch": 0.4753199268738574, + "grad_norm": 1.1596148014068604, + "learning_rate": 2.9069210293781113e-05, + "loss": 0.8375, + "step": 195 + }, + { + "epoch": 0.4875076173065204, + "grad_norm": 1.0314658880233765, + "learning_rate": 2.8998170713572838e-05, + "loss": 0.8308, + "step": 200 + }, + { + "epoch": 0.4996953077391834, + "grad_norm": 1.0837023258209229, + "learning_rate": 2.8924612350458293e-05, + "loss": 0.889, + "step": 205 + }, + { + "epoch": 0.5118829981718465, + "grad_norm": 1.0469948053359985, + "learning_rate": 2.8848548440277458e-05, + "loss": 0.7923, + "step": 210 + }, + { + "epoch": 0.5240706886045094, + "grad_norm": 1.1175199747085571, + "learning_rate": 2.876999266970987e-05, + "loss": 0.8471, + "step": 215 + }, + { + "epoch": 0.5362583790371724, + "grad_norm": 1.056563138961792, + "learning_rate": 2.8688959173811907e-05, + "loss": 0.7695, + "step": 220 + }, + { + "epoch": 0.5484460694698354, + "grad_norm": 1.1548407077789307, + "learning_rate": 2.860546253347334e-05, + "loss": 0.7662, + "step": 225 + }, + { + "epoch": 0.5606337599024985, + "grad_norm": 1.3764286041259766, + "learning_rate": 2.8519517772793735e-05, + "loss": 0.7508, + "step": 230 + }, + { + "epoch": 0.5728214503351615, + "grad_norm": 1.0955381393432617, + "learning_rate": 2.8431140356379032e-05, + "loss": 0.822, + "step": 235 + }, + { + "epoch": 0.5850091407678245, + "grad_norm": 1.0865767002105713, + "learning_rate": 2.8340346186558928e-05, + "loss": 0.7362, + "step": 240 + }, + { + "epoch": 0.5971968312004875, + "grad_norm": 1.1716761589050293, + "learning_rate": 2.8247151600525454e-05, + "loss": 0.7098, + "step": 245 + }, + { + "epoch": 0.6093845216331505, + "grad_norm": 1.0148050785064697, + "learning_rate": 2.8151573367393293e-05, + "loss": 0.7521, + "step": 250 + }, + { + "epoch": 0.6215722120658135, + "grad_norm": 1.2440930604934692, + "learning_rate": 2.8053628685182446e-05, + "loss": 0.7074, + "step": 255 + }, + { + "epoch": 0.6337599024984766, + "grad_norm": 1.4624284505844116, + "learning_rate": 2.7953335177723655e-05, + "loss": 0.7044, + "step": 260 + }, + { + "epoch": 0.6459475929311396, + "grad_norm": 1.0968961715698242, + "learning_rate": 2.7850710891487227e-05, + "loss": 0.6715, + "step": 265 + }, + { + "epoch": 0.6581352833638026, + "grad_norm": 1.219558596611023, + "learning_rate": 2.774577429233583e-05, + "loss": 0.6701, + "step": 270 + }, + { + "epoch": 0.6703229737964655, + "grad_norm": 1.0738011598587036, + "learning_rate": 2.7638544262201793e-05, + "loss": 0.6223, + "step": 275 + }, + { + "epoch": 0.6825106642291285, + "grad_norm": 1.29618501663208, + "learning_rate": 2.7529040095689573e-05, + "loss": 0.656, + "step": 280 + }, + { + "epoch": 0.6946983546617916, + "grad_norm": 1.6890515089035034, + "learning_rate": 2.741728149660392e-05, + "loss": 0.6152, + "step": 285 + }, + { + "epoch": 0.7068860450944546, + "grad_norm": 1.176666259765625, + "learning_rate": 2.7303288574404484e-05, + "loss": 0.5871, + "step": 290 + }, + { + "epoch": 0.7190737355271176, + "grad_norm": 1.129810094833374, + "learning_rate": 2.7187081840587356e-05, + "loss": 0.6681, + "step": 295 + }, + { + "epoch": 0.7312614259597806, + "grad_norm": 1.2824153900146484, + "learning_rate": 2.7068682204994304e-05, + "loss": 0.6053, + "step": 300 + }, + { + "epoch": 0.7434491163924436, + "grad_norm": 1.281540870666504, + "learning_rate": 2.694811097205034e-05, + "loss": 0.6063, + "step": 305 + }, + { + "epoch": 0.7556368068251066, + "grad_norm": 1.1222610473632812, + "learning_rate": 2.682538983693027e-05, + "loss": 0.6131, + "step": 310 + }, + { + "epoch": 0.7678244972577697, + "grad_norm": 1.2597250938415527, + "learning_rate": 2.670054088165492e-05, + "loss": 0.5828, + "step": 315 + }, + { + "epoch": 0.7800121876904327, + "grad_norm": 1.1869267225265503, + "learning_rate": 2.657358657111781e-05, + "loss": 0.5993, + "step": 320 + }, + { + "epoch": 0.7921998781230957, + "grad_norm": 1.2271625995635986, + "learning_rate": 2.644454974904286e-05, + "loss": 0.5586, + "step": 325 + }, + { + "epoch": 0.8043875685557587, + "grad_norm": 1.1223441362380981, + "learning_rate": 2.6313453633874e-05, + "loss": 0.5743, + "step": 330 + }, + { + "epoch": 0.8165752589884216, + "grad_norm": 1.2040754556655884, + "learning_rate": 2.6180321814597293e-05, + "loss": 0.5783, + "step": 335 + }, + { + "epoch": 0.8287629494210847, + "grad_norm": 1.1933704614639282, + "learning_rate": 2.6045178246496433e-05, + "loss": 0.5783, + "step": 340 + }, + { + "epoch": 0.8409506398537477, + "grad_norm": 1.221615195274353, + "learning_rate": 2.590804724684232e-05, + "loss": 0.5706, + "step": 345 + }, + { + "epoch": 0.8531383302864107, + "grad_norm": 1.1511361598968506, + "learning_rate": 2.576895349051747e-05, + "loss": 0.4898, + "step": 350 + }, + { + "epoch": 0.8653260207190737, + "grad_norm": 1.255771279335022, + "learning_rate": 2.5627922005576115e-05, + "loss": 0.5569, + "step": 355 + }, + { + "epoch": 0.8775137111517367, + "grad_norm": 1.2794138193130493, + "learning_rate": 2.5484978168740744e-05, + "loss": 0.5808, + "step": 360 + }, + { + "epoch": 0.8897014015843998, + "grad_norm": 1.2224044799804688, + "learning_rate": 2.5340147700835898e-05, + "loss": 0.5179, + "step": 365 + }, + { + "epoch": 0.9018890920170628, + "grad_norm": 1.1592283248901367, + "learning_rate": 2.5193456662160043e-05, + "loss": 0.5185, + "step": 370 + }, + { + "epoch": 0.9140767824497258, + "grad_norm": 1.4630870819091797, + "learning_rate": 2.5044931447796388e-05, + "loss": 0.539, + "step": 375 + }, + { + "epoch": 0.9262644728823888, + "grad_norm": 1.2058758735656738, + "learning_rate": 2.4894598782863434e-05, + "loss": 0.4671, + "step": 380 + }, + { + "epoch": 0.9384521633150518, + "grad_norm": 1.2874910831451416, + "learning_rate": 2.4742485717706142e-05, + "loss": 0.5208, + "step": 385 + }, + { + "epoch": 0.9506398537477148, + "grad_norm": 1.1948449611663818, + "learning_rate": 2.4588619623028602e-05, + "loss": 0.4562, + "step": 390 + }, + { + "epoch": 0.9628275441803779, + "grad_norm": 1.3434860706329346, + "learning_rate": 2.443302818496903e-05, + "loss": 0.4802, + "step": 395 + }, + { + "epoch": 0.9750152346130408, + "grad_norm": 1.1934623718261719, + "learning_rate": 2.4275739400118017e-05, + "loss": 0.4721, + "step": 400 + }, + { + "epoch": 0.9872029250457038, + "grad_norm": 1.3956284523010254, + "learning_rate": 2.4116781570480926e-05, + "loss": 0.4728, + "step": 405 + }, + { + "epoch": 0.9993906154783668, + "grad_norm": 1.188158631324768, + "learning_rate": 2.395618329838533e-05, + "loss": 0.4252, + "step": 410 + }, + { + "epoch": 1.0097501523461303, + "grad_norm": 1.258504867553711, + "learning_rate": 2.3793973481334396e-05, + "loss": 0.3663, + "step": 415 + }, + { + "epoch": 1.0219378427787935, + "grad_norm": 1.1329582929611206, + "learning_rate": 2.363018130680717e-05, + "loss": 0.3835, + "step": 420 + }, + { + "epoch": 1.0341255332114565, + "grad_norm": 1.2192707061767578, + "learning_rate": 2.3464836247006684e-05, + "loss": 0.3982, + "step": 425 + }, + { + "epoch": 1.0463132236441195, + "grad_norm": 1.1505147218704224, + "learning_rate": 2.3297968053556838e-05, + "loss": 0.4308, + "step": 430 + }, + { + "epoch": 1.0585009140767825, + "grad_norm": 1.277911901473999, + "learning_rate": 2.3129606752148977e-05, + "loss": 0.3907, + "step": 435 + }, + { + "epoch": 1.0706886045094455, + "grad_norm": 1.2622522115707397, + "learning_rate": 2.2959782637139173e-05, + "loss": 0.3926, + "step": 440 + }, + { + "epoch": 1.0828762949421085, + "grad_norm": 1.22805917263031, + "learning_rate": 2.2788526266097188e-05, + "loss": 0.4028, + "step": 445 + }, + { + "epoch": 1.0950639853747715, + "grad_norm": 1.2299867868423462, + "learning_rate": 2.261586845430801e-05, + "loss": 0.4011, + "step": 450 + }, + { + "epoch": 1.1072516758074344, + "grad_norm": 1.3027052879333496, + "learning_rate": 2.2441840269227093e-05, + "loss": 0.37, + "step": 455 + }, + { + "epoch": 1.1194393662400974, + "grad_norm": 1.1391916275024414, + "learning_rate": 2.2266473024890152e-05, + "loss": 0.4093, + "step": 460 + }, + { + "epoch": 1.1316270566727604, + "grad_norm": 1.3281124830245972, + "learning_rate": 2.2089798276278652e-05, + "loss": 0.3636, + "step": 465 + }, + { + "epoch": 1.1438147471054236, + "grad_norm": 1.293087363243103, + "learning_rate": 2.1911847813641897e-05, + "loss": 0.3401, + "step": 470 + }, + { + "epoch": 1.1560024375380866, + "grad_norm": 1.2027473449707031, + "learning_rate": 2.1732653656776802e-05, + "loss": 0.348, + "step": 475 + }, + { + "epoch": 1.1681901279707496, + "grad_norm": 1.2602580785751343, + "learning_rate": 2.1552248049266365e-05, + "loss": 0.3394, + "step": 480 + }, + { + "epoch": 1.1803778184034126, + "grad_norm": 1.270362138748169, + "learning_rate": 2.1370663452677867e-05, + "loss": 0.3616, + "step": 485 + }, + { + "epoch": 1.1925655088360756, + "grad_norm": 1.1398749351501465, + "learning_rate": 2.118793254072184e-05, + "loss": 0.3446, + "step": 490 + }, + { + "epoch": 1.2047531992687386, + "grad_norm": 1.353935718536377, + "learning_rate": 2.100408819337289e-05, + "loss": 0.3276, + "step": 495 + }, + { + "epoch": 1.2169408897014016, + "grad_norm": 1.2750605344772339, + "learning_rate": 2.0819163490953355e-05, + "loss": 0.3675, + "step": 500 + }, + { + "epoch": 1.2291285801340646, + "grad_norm": 1.271055817604065, + "learning_rate": 2.0633191708180984e-05, + "loss": 0.32, + "step": 505 + }, + { + "epoch": 1.2413162705667276, + "grad_norm": 1.3142497539520264, + "learning_rate": 2.0446206308181575e-05, + "loss": 0.3387, + "step": 510 + }, + { + "epoch": 1.2535039609993905, + "grad_norm": 1.3727620840072632, + "learning_rate": 2.0258240936467732e-05, + "loss": 0.3535, + "step": 515 + }, + { + "epoch": 1.2656916514320535, + "grad_norm": 1.2677583694458008, + "learning_rate": 2.006932941488482e-05, + "loss": 0.3435, + "step": 520 + }, + { + "epoch": 1.2778793418647165, + "grad_norm": 1.234449863433838, + "learning_rate": 1.987950573552517e-05, + "loss": 0.3347, + "step": 525 + }, + { + "epoch": 1.2900670322973795, + "grad_norm": 1.2721590995788574, + "learning_rate": 1.968880405461166e-05, + "loss": 0.2609, + "step": 530 + }, + { + "epoch": 1.3022547227300427, + "grad_norm": 1.2308814525604248, + "learning_rate": 1.9497258686351762e-05, + "loss": 0.2605, + "step": 535 + }, + { + "epoch": 1.3144424131627057, + "grad_norm": 1.1462891101837158, + "learning_rate": 1.930490409676316e-05, + "loss": 0.289, + "step": 540 + }, + { + "epoch": 1.3266301035953687, + "grad_norm": 1.2892414331436157, + "learning_rate": 1.911177489747205e-05, + "loss": 0.3093, + "step": 545 + }, + { + "epoch": 1.3388177940280317, + "grad_norm": 1.3105809688568115, + "learning_rate": 1.8917905839485248e-05, + "loss": 0.2741, + "step": 550 + }, + { + "epoch": 1.3510054844606947, + "grad_norm": 1.374816656112671, + "learning_rate": 1.8723331806937212e-05, + "loss": 0.2697, + "step": 555 + }, + { + "epoch": 1.3631931748933577, + "grad_norm": 1.1749573945999146, + "learning_rate": 1.8528087810813108e-05, + "loss": 0.288, + "step": 560 + }, + { + "epoch": 1.3753808653260207, + "grad_norm": 1.301391363143921, + "learning_rate": 1.833220898264905e-05, + "loss": 0.2872, + "step": 565 + }, + { + "epoch": 1.3875685557586837, + "grad_norm": 1.1195369958877563, + "learning_rate": 1.8135730568210655e-05, + "loss": 0.3068, + "step": 570 + }, + { + "epoch": 1.3997562461913469, + "grad_norm": 1.2597671747207642, + "learning_rate": 1.793868792115105e-05, + "loss": 0.329, + "step": 575 + }, + { + "epoch": 1.4119439366240099, + "grad_norm": 1.2183738946914673, + "learning_rate": 1.7741116496649443e-05, + "loss": 0.2989, + "step": 580 + }, + { + "epoch": 1.4241316270566728, + "grad_norm": 1.1467089653015137, + "learning_rate": 1.754305184503144e-05, + "loss": 0.3132, + "step": 585 + }, + { + "epoch": 1.4363193174893358, + "grad_norm": 1.2103413343429565, + "learning_rate": 1.7344529605372244e-05, + "loss": 0.2882, + "step": 590 + }, + { + "epoch": 1.4485070079219988, + "grad_norm": 1.2793539762496948, + "learning_rate": 1.71455854990839e-05, + "loss": 0.2476, + "step": 595 + }, + { + "epoch": 1.4606946983546618, + "grad_norm": 1.236932396888733, + "learning_rate": 1.6946255323487667e-05, + "loss": 0.2554, + "step": 600 + }, + { + "epoch": 1.4728823887873248, + "grad_norm": 1.2248042821884155, + "learning_rate": 1.674657494537281e-05, + "loss": 0.2385, + "step": 605 + }, + { + "epoch": 1.4850700792199878, + "grad_norm": 1.2836277484893799, + "learning_rate": 1.6546580294542823e-05, + "loss": 0.2556, + "step": 610 + }, + { + "epoch": 1.4972577696526508, + "grad_norm": 1.1010229587554932, + "learning_rate": 1.6346307357350375e-05, + "loss": 0.2154, + "step": 615 + }, + { + "epoch": 1.5094454600853138, + "grad_norm": 1.1976797580718994, + "learning_rate": 1.614579217022201e-05, + "loss": 0.2318, + "step": 620 + }, + { + "epoch": 1.5216331505179768, + "grad_norm": 1.204330325126648, + "learning_rate": 1.594507081317391e-05, + "loss": 0.2264, + "step": 625 + }, + { + "epoch": 1.5338208409506398, + "grad_norm": 1.1244622468948364, + "learning_rate": 1.5744179403319752e-05, + "loss": 0.2744, + "step": 630 + }, + { + "epoch": 1.5460085313833027, + "grad_norm": 1.1601009368896484, + "learning_rate": 1.554315408837195e-05, + "loss": 0.2507, + "step": 635 + }, + { + "epoch": 1.5581962218159657, + "grad_norm": 1.2472013235092163, + "learning_rate": 1.534203104013733e-05, + "loss": 0.2284, + "step": 640 + }, + { + "epoch": 1.5703839122486287, + "grad_norm": 1.0406790971755981, + "learning_rate": 1.5140846448008516e-05, + "loss": 0.2078, + "step": 645 + }, + { + "epoch": 1.582571602681292, + "grad_norm": 1.2787387371063232, + "learning_rate": 1.4939636512452128e-05, + "loss": 0.2203, + "step": 650 + }, + { + "epoch": 1.594759293113955, + "grad_norm": 1.1235847473144531, + "learning_rate": 1.4738437438494997e-05, + "loss": 0.2279, + "step": 655 + }, + { + "epoch": 1.606946983546618, + "grad_norm": 1.3121788501739502, + "learning_rate": 1.4537285429209551e-05, + "loss": 0.223, + "step": 660 + }, + { + "epoch": 1.619134673979281, + "grad_norm": 1.0772984027862549, + "learning_rate": 1.4336216679199563e-05, + "loss": 0.2069, + "step": 665 + }, + { + "epoch": 1.631322364411944, + "grad_norm": 1.4894728660583496, + "learning_rate": 1.4135267368087427e-05, + "loss": 0.1989, + "step": 670 + }, + { + "epoch": 1.643510054844607, + "grad_norm": 1.0883209705352783, + "learning_rate": 1.3934473654004096e-05, + "loss": 0.2236, + "step": 675 + }, + { + "epoch": 1.65569774527727, + "grad_norm": 1.1771758794784546, + "learning_rate": 1.3733871667082928e-05, + "loss": 0.211, + "step": 680 + }, + { + "epoch": 1.667885435709933, + "grad_norm": 1.1947987079620361, + "learning_rate": 1.3533497502958574e-05, + "loss": 0.1868, + "step": 685 + }, + { + "epoch": 1.680073126142596, + "grad_norm": 1.7330470085144043, + "learning_rate": 1.3333387216272e-05, + "loss": 0.2156, + "step": 690 + }, + { + "epoch": 1.692260816575259, + "grad_norm": 1.1736880540847778, + "learning_rate": 1.3133576814182982e-05, + "loss": 0.2341, + "step": 695 + }, + { + "epoch": 1.704448507007922, + "grad_norm": 1.4307278394699097, + "learning_rate": 1.29341022498911e-05, + "loss": 0.2125, + "step": 700 + }, + { + "epoch": 1.716636197440585, + "grad_norm": 1.2069326639175415, + "learning_rate": 1.273499941616642e-05, + "loss": 0.1785, + "step": 705 + }, + { + "epoch": 1.728823887873248, + "grad_norm": 1.284803867340088, + "learning_rate": 1.2536304138891069e-05, + "loss": 0.209, + "step": 710 + }, + { + "epoch": 1.741011578305911, + "grad_norm": 1.317599892616272, + "learning_rate": 1.2338052170612893e-05, + "loss": 0.2303, + "step": 715 + }, + { + "epoch": 1.753199268738574, + "grad_norm": 1.3128317594528198, + "learning_rate": 1.214027918411221e-05, + "loss": 0.1932, + "step": 720 + }, + { + "epoch": 1.765386959171237, + "grad_norm": 1.0282201766967773, + "learning_rate": 1.1943020765983004e-05, + "loss": 0.1761, + "step": 725 + }, + { + "epoch": 1.7775746496039, + "grad_norm": 1.085756778717041, + "learning_rate": 1.1746312410229595e-05, + "loss": 0.1848, + "step": 730 + }, + { + "epoch": 1.789762340036563, + "grad_norm": 1.0350303649902344, + "learning_rate": 1.1550189511879957e-05, + "loss": 0.1826, + "step": 735 + }, + { + "epoch": 1.801950030469226, + "grad_norm": 1.220381259918213, + "learning_rate": 1.1354687360616853e-05, + "loss": 0.1877, + "step": 740 + }, + { + "epoch": 1.814137720901889, + "grad_norm": 1.1854599714279175, + "learning_rate": 1.1159841134427966e-05, + "loss": 0.1623, + "step": 745 + }, + { + "epoch": 1.826325411334552, + "grad_norm": 1.3611849546432495, + "learning_rate": 1.0965685893276043e-05, + "loss": 0.1757, + "step": 750 + }, + { + "epoch": 1.8385131017672152, + "grad_norm": 1.3513563871383667, + "learning_rate": 1.077225657279036e-05, + "loss": 0.1941, + "step": 755 + }, + { + "epoch": 1.8507007921998782, + "grad_norm": 1.106445074081421, + "learning_rate": 1.0579587977980518e-05, + "loss": 0.1642, + "step": 760 + }, + { + "epoch": 1.8628884826325411, + "grad_norm": 1.1038678884506226, + "learning_rate": 1.0387714776973735e-05, + "loss": 0.1822, + "step": 765 + }, + { + "epoch": 1.8750761730652041, + "grad_norm": 1.1920491456985474, + "learning_rate": 1.0196671494776792e-05, + "loss": 0.2088, + "step": 770 + }, + { + "epoch": 1.8872638634978671, + "grad_norm": 1.2086994647979736, + "learning_rate": 1.0006492507063739e-05, + "loss": 0.1711, + "step": 775 + }, + { + "epoch": 1.8994515539305301, + "grad_norm": 1.20108962059021, + "learning_rate": 9.817212033990413e-06, + "loss": 0.1706, + "step": 780 + }, + { + "epoch": 1.9116392443631933, + "grad_norm": 1.1474496126174927, + "learning_rate": 9.62886413403701e-06, + "loss": 0.154, + "step": 785 + }, + { + "epoch": 1.9238269347958563, + "grad_norm": 1.0566107034683228, + "learning_rate": 9.441482697879722e-06, + "loss": 0.1583, + "step": 790 + }, + { + "epoch": 1.9360146252285193, + "grad_norm": 1.3299691677093506, + "learning_rate": 9.255101442292546e-06, + "loss": 0.1673, + "step": 795 + }, + { + "epoch": 1.9482023156611823, + "grad_norm": 1.0805200338363647, + "learning_rate": 9.06975390408041e-06, + "loss": 0.1923, + "step": 800 + }, + { + "epoch": 1.9603900060938453, + "grad_norm": 1.0556820631027222, + "learning_rate": 8.885473434044688e-06, + "loss": 0.1567, + "step": 805 + }, + { + "epoch": 1.9725776965265083, + "grad_norm": 1.1081392765045166, + "learning_rate": 8.702293190982147e-06, + "loss": 0.149, + "step": 810 + }, + { + "epoch": 1.9847653869591713, + "grad_norm": 1.172447681427002, + "learning_rate": 8.520246135718484e-06, + "loss": 0.1603, + "step": 815 + }, + { + "epoch": 1.9969530773918343, + "grad_norm": 0.9945250153541565, + "learning_rate": 8.339365025177473e-06, + "loss": 0.168, + "step": 820 + }, + { + "epoch": 2.0073126142595976, + "grad_norm": 1.2059139013290405, + "learning_rate": 8.15968240648678e-06, + "loss": 0.158, + "step": 825 + }, + { + "epoch": 2.0195003046922606, + "grad_norm": 0.8737114071846008, + "learning_rate": 7.981230611121542e-06, + "loss": 0.1148, + "step": 830 + }, + { + "epoch": 2.031687995124924, + "grad_norm": 1.0554602146148682, + "learning_rate": 7.804041749086772e-06, + "loss": 0.134, + "step": 835 + }, + { + "epoch": 2.043875685557587, + "grad_norm": 1.1360574960708618, + "learning_rate": 7.628147703139593e-06, + "loss": 0.1297, + "step": 840 + }, + { + "epoch": 2.05606337599025, + "grad_norm": 1.2054904699325562, + "learning_rate": 7.4535801230523315e-06, + "loss": 0.1167, + "step": 845 + }, + { + "epoch": 2.068251066422913, + "grad_norm": 0.9711463451385498, + "learning_rate": 7.280370419917604e-06, + "loss": 0.1113, + "step": 850 + }, + { + "epoch": 2.080438756855576, + "grad_norm": 1.004541277885437, + "learning_rate": 7.108549760496305e-06, + "loss": 0.1148, + "step": 855 + }, + { + "epoch": 2.092626447288239, + "grad_norm": 1.2445569038391113, + "learning_rate": 6.9381490616095364e-06, + "loss": 0.1424, + "step": 860 + }, + { + "epoch": 2.104814137720902, + "grad_norm": 1.0174611806869507, + "learning_rate": 6.769198984575583e-06, + "loss": 0.1279, + "step": 865 + }, + { + "epoch": 2.117001828153565, + "grad_norm": 1.1085169315338135, + "learning_rate": 6.601729929692801e-06, + "loss": 0.1271, + "step": 870 + }, + { + "epoch": 2.129189518586228, + "grad_norm": 1.0144529342651367, + "learning_rate": 6.435772030769476e-06, + "loss": 0.1012, + "step": 875 + }, + { + "epoch": 2.141377209018891, + "grad_norm": 0.9043450355529785, + "learning_rate": 6.271355149701678e-06, + "loss": 0.1361, + "step": 880 + }, + { + "epoch": 2.153564899451554, + "grad_norm": 1.0285218954086304, + "learning_rate": 6.108508871100003e-06, + "loss": 0.0992, + "step": 885 + }, + { + "epoch": 2.165752589884217, + "grad_norm": 1.0120166540145874, + "learning_rate": 5.947262496966196e-06, + "loss": 0.1161, + "step": 890 + }, + { + "epoch": 2.17794028031688, + "grad_norm": 0.9688416719436646, + "learning_rate": 5.787645041420706e-06, + "loss": 0.1098, + "step": 895 + }, + { + "epoch": 2.190127970749543, + "grad_norm": 0.8752933144569397, + "learning_rate": 5.62968522548192e-06, + "loss": 0.1131, + "step": 900 + }, + { + "epoch": 2.202315661182206, + "grad_norm": 0.8939966559410095, + "learning_rate": 5.473411471898227e-06, + "loss": 0.11, + "step": 905 + }, + { + "epoch": 2.214503351614869, + "grad_norm": 1.0005139112472534, + "learning_rate": 5.3188519000337615e-06, + "loss": 0.095, + "step": 910 + }, + { + "epoch": 2.226691042047532, + "grad_norm": 1.090046763420105, + "learning_rate": 5.1660343208086405e-06, + "loss": 0.0996, + "step": 915 + }, + { + "epoch": 2.238878732480195, + "grad_norm": 0.8882523775100708, + "learning_rate": 5.014986231694786e-06, + "loss": 0.1054, + "step": 920 + }, + { + "epoch": 2.251066422912858, + "grad_norm": 0.88932204246521, + "learning_rate": 4.8657348117681615e-06, + "loss": 0.1268, + "step": 925 + }, + { + "epoch": 2.263254113345521, + "grad_norm": 0.9554125666618347, + "learning_rate": 4.718306916818195e-06, + "loss": 0.1026, + "step": 930 + }, + { + "epoch": 2.2754418037781843, + "grad_norm": 1.135064721107483, + "learning_rate": 4.572729074515491e-06, + "loss": 0.0978, + "step": 935 + }, + { + "epoch": 2.2876294942108473, + "grad_norm": 0.8762016892433167, + "learning_rate": 4.429027479638517e-06, + "loss": 0.106, + "step": 940 + }, + { + "epoch": 2.2998171846435103, + "grad_norm": 0.9784172177314758, + "learning_rate": 4.287227989360188e-06, + "loss": 0.1142, + "step": 945 + }, + { + "epoch": 2.3120048750761732, + "grad_norm": 0.9356462955474854, + "learning_rate": 4.147356118595249e-06, + "loss": 0.113, + "step": 950 + }, + { + "epoch": 2.3241925655088362, + "grad_norm": 1.0305912494659424, + "learning_rate": 4.009437035409185e-06, + "loss": 0.1103, + "step": 955 + }, + { + "epoch": 2.3363802559414992, + "grad_norm": 0.889022707939148, + "learning_rate": 3.8734955564895535e-06, + "loss": 0.0994, + "step": 960 + }, + { + "epoch": 2.348567946374162, + "grad_norm": 0.9739712476730347, + "learning_rate": 3.739556142680606e-06, + "loss": 0.0983, + "step": 965 + }, + { + "epoch": 2.360755636806825, + "grad_norm": 0.735122799873352, + "learning_rate": 3.607642894581823e-06, + "loss": 0.0889, + "step": 970 + }, + { + "epoch": 2.372943327239488, + "grad_norm": 0.8130188584327698, + "learning_rate": 3.4777795482113693e-06, + "loss": 0.0976, + "step": 975 + }, + { + "epoch": 2.385131017672151, + "grad_norm": 0.9341619610786438, + "learning_rate": 3.349989470735134e-06, + "loss": 0.1085, + "step": 980 + }, + { + "epoch": 2.397318708104814, + "grad_norm": 0.9433596134185791, + "learning_rate": 3.2242956562620755e-06, + "loss": 0.1178, + "step": 985 + }, + { + "epoch": 2.409506398537477, + "grad_norm": 1.079834222793579, + "learning_rate": 3.10072072170677e-06, + "loss": 0.1136, + "step": 990 + }, + { + "epoch": 2.42169408897014, + "grad_norm": 0.8432468771934509, + "learning_rate": 2.979286902719815e-06, + "loss": 0.1193, + "step": 995 + }, + { + "epoch": 2.433881779402803, + "grad_norm": 0.7117247581481934, + "learning_rate": 2.8600160496867796e-06, + "loss": 0.0925, + "step": 1000 + }, + { + "epoch": 2.446069469835466, + "grad_norm": 0.8283855319023132, + "learning_rate": 2.7429296237965595e-06, + "loss": 0.0833, + "step": 1005 + }, + { + "epoch": 2.458257160268129, + "grad_norm": 0.6625029444694519, + "learning_rate": 2.6280486931797137e-06, + "loss": 0.0873, + "step": 1010 + }, + { + "epoch": 2.470444850700792, + "grad_norm": 0.9672327637672424, + "learning_rate": 2.5153939291175152e-06, + "loss": 0.0833, + "step": 1015 + }, + { + "epoch": 2.482632541133455, + "grad_norm": 0.785687267780304, + "learning_rate": 2.4049856023224696e-06, + "loss": 0.0947, + "step": 1020 + }, + { + "epoch": 2.494820231566118, + "grad_norm": 0.763062596321106, + "learning_rate": 2.2968435792908465e-06, + "loss": 0.0913, + "step": 1025 + }, + { + "epoch": 2.507007921998781, + "grad_norm": 0.7724928259849548, + "learning_rate": 2.190987318727968e-06, + "loss": 0.0953, + "step": 1030 + }, + { + "epoch": 2.519195612431444, + "grad_norm": 0.8609941005706787, + "learning_rate": 2.0874358680468953e-06, + "loss": 0.0905, + "step": 1035 + }, + { + "epoch": 2.531383302864107, + "grad_norm": 1.1015088558197021, + "learning_rate": 1.986207859941092e-06, + "loss": 0.1263, + "step": 1040 + }, + { + "epoch": 2.54357099329677, + "grad_norm": 0.7434006929397583, + "learning_rate": 1.8873215090317082e-06, + "loss": 0.0955, + "step": 1045 + }, + { + "epoch": 2.555758683729433, + "grad_norm": 0.7465407252311707, + "learning_rate": 1.7907946085901262e-06, + "loss": 0.1095, + "step": 1050 + }, + { + "epoch": 2.567946374162096, + "grad_norm": 0.7787968516349792, + "learning_rate": 1.6966445273362952e-06, + "loss": 0.0994, + "step": 1055 + }, + { + "epoch": 2.580134064594759, + "grad_norm": 1.629848837852478, + "learning_rate": 1.6048882063134419e-06, + "loss": 0.1022, + "step": 1060 + }, + { + "epoch": 2.5923217550274225, + "grad_norm": 0.7993577718734741, + "learning_rate": 1.5155421558397869e-06, + "loss": 0.0985, + "step": 1065 + }, + { + "epoch": 2.6045094454600854, + "grad_norm": 0.6390942931175232, + "learning_rate": 1.4286224525377174e-06, + "loss": 0.0853, + "step": 1070 + }, + { + "epoch": 2.6166971358927484, + "grad_norm": 0.8979994654655457, + "learning_rate": 1.3441447364410226e-06, + "loss": 0.0956, + "step": 1075 + }, + { + "epoch": 2.6288848263254114, + "grad_norm": 0.8673097491264343, + "learning_rate": 1.2621242081806667e-06, + "loss": 0.1059, + "step": 1080 + }, + { + "epoch": 2.6410725167580744, + "grad_norm": 0.7147300839424133, + "learning_rate": 1.1825756262496556e-06, + "loss": 0.107, + "step": 1085 + }, + { + "epoch": 2.6532602071907374, + "grad_norm": 0.6626409888267517, + "learning_rate": 1.1055133043474385e-06, + "loss": 0.0932, + "step": 1090 + }, + { + "epoch": 2.6654478976234004, + "grad_norm": 0.7927083373069763, + "learning_rate": 1.0309511088043394e-06, + "loss": 0.0841, + "step": 1095 + }, + { + "epoch": 2.6776355880560634, + "grad_norm": 0.6958141922950745, + "learning_rate": 9.589024560865145e-07, + "loss": 0.0852, + "step": 1100 + }, + { + "epoch": 2.6898232784887264, + "grad_norm": 0.906074047088623, + "learning_rate": 8.893803103818304e-07, + "loss": 0.0955, + "step": 1105 + }, + { + "epoch": 2.7020109689213894, + "grad_norm": 0.8387671113014221, + "learning_rate": 8.22397181267125e-07, + "loss": 0.0931, + "step": 1110 + }, + { + "epoch": 2.7141986593540524, + "grad_norm": 0.7803393006324768, + "learning_rate": 7.579651214572941e-07, + "loss": 0.091, + "step": 1115 + }, + { + "epoch": 2.7263863497867153, + "grad_norm": 0.8108364939689636, + "learning_rate": 6.960957246365557e-07, + "loss": 0.0961, + "step": 1120 + }, + { + "epoch": 2.7385740402193783, + "grad_norm": 0.7496523261070251, + "learning_rate": 6.368001233723192e-07, + "loss": 0.0948, + "step": 1125 + }, + { + "epoch": 2.7507617306520413, + "grad_norm": 0.5152583718299866, + "learning_rate": 5.800889871120418e-07, + "loss": 0.0992, + "step": 1130 + }, + { + "epoch": 2.7629494210847043, + "grad_norm": 0.7080441117286682, + "learning_rate": 5.259725202633942e-07, + "loss": 0.0943, + "step": 1135 + }, + { + "epoch": 2.7751371115173673, + "grad_norm": 0.6159129738807678, + "learning_rate": 4.7446046035811373e-07, + "loss": 0.0779, + "step": 1140 + }, + { + "epoch": 2.7873248019500307, + "grad_norm": 0.7342801690101624, + "learning_rate": 4.2556207629988187e-07, + "loss": 0.0827, + "step": 1145 + }, + { + "epoch": 2.7995124923826937, + "grad_norm": 0.7838583588600159, + "learning_rate": 3.792861666964842e-07, + "loss": 0.1113, + "step": 1150 + }, + { + "epoch": 2.8117001828153567, + "grad_norm": 0.7867527008056641, + "learning_rate": 3.3564105827663893e-07, + "loss": 0.1021, + "step": 1155 + }, + { + "epoch": 2.8238878732480197, + "grad_norm": 0.7100556492805481, + "learning_rate": 2.946346043917136e-07, + "loss": 0.089, + "step": 1160 + }, + { + "epoch": 2.8360755636806827, + "grad_norm": 0.8508639335632324, + "learning_rate": 2.5627418360260225e-07, + "loss": 0.0857, + "step": 1165 + }, + { + "epoch": 2.8482632541133457, + "grad_norm": 0.89335697889328, + "learning_rate": 2.2056669835206878e-07, + "loss": 0.0935, + "step": 1170 + }, + { + "epoch": 2.8604509445460087, + "grad_norm": 0.7204089164733887, + "learning_rate": 1.8751857372274416e-07, + "loss": 0.082, + "step": 1175 + }, + { + "epoch": 2.8726386349786717, + "grad_norm": 0.6584092378616333, + "learning_rate": 1.5713575628101274e-07, + "loss": 0.0883, + "step": 1180 + }, + { + "epoch": 2.8848263254113347, + "grad_norm": 0.7702075242996216, + "learning_rate": 1.294237130070064e-07, + "loss": 0.0953, + "step": 1185 + }, + { + "epoch": 2.8970140158439976, + "grad_norm": 0.7975074052810669, + "learning_rate": 1.0438743031090325e-07, + "loss": 0.0866, + "step": 1190 + }, + { + "epoch": 2.9092017062766606, + "grad_norm": 0.718615710735321, + "learning_rate": 8.203141313568363e-08, + "loss": 0.0857, + "step": 1195 + }, + { + "epoch": 2.9213893967093236, + "grad_norm": 0.7818317413330078, + "learning_rate": 6.235968414652848e-08, + "loss": 0.0985, + "step": 1200 + }, + { + "epoch": 2.9335770871419866, + "grad_norm": 0.7728747725486755, + "learning_rate": 4.537578300699008e-08, + "loss": 0.0984, + "step": 1205 + }, + { + "epoch": 2.9457647775746496, + "grad_norm": 0.7189415097236633, + "learning_rate": 3.108276574208646e-08, + "loss": 0.0884, + "step": 1210 + }, + { + "epoch": 2.9579524680073126, + "grad_norm": 0.6835620403289795, + "learning_rate": 1.9483204188402993e-08, + "loss": 0.0786, + "step": 1215 + }, + { + "epoch": 2.9701401584399756, + "grad_norm": 0.7385515570640564, + "learning_rate": 1.0579185531324198e-08, + "loss": 0.0888, + "step": 1220 + }, + { + "epoch": 2.9823278488726386, + "grad_norm": 0.9119024872779846, + "learning_rate": 4.372311929482509e-09, + "loss": 0.1037, + "step": 1225 + }, + { + "epoch": 2.9945155393053016, + "grad_norm": 0.7863675951957703, + "learning_rate": 8.637002264555438e-10, + "loss": 0.094, + "step": 1230 + }, + { + "epoch": 3.0, + "step": 1233, + "total_flos": 1.5604276376528486e+18, + "train_loss": 0.42234487697445855, + "train_runtime": 1219.9361, + "train_samples_per_second": 32.274, + "train_steps_per_second": 1.011 + } + ], + "logging_steps": 5, + "max_steps": 1233, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5604276376528486e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca38d8f6bb9db6ddd72e7fa3760ee3d972c3861a --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/2_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e750692b1270e97bc4b2ec43e0b49d7487d3703995a09ad2fd00a271a6ccaa53 +size 8273 diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1f12bdeb47936ed706d3886eecd0fd48c2066c53 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 3_128_e3_3e-5 + results: [] +--- + + + +# 3_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2940d52e0573fad8d48ee1159468400e7191d4a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "down_proj", + "k_proj", + "up_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39f5a931f3f6f041be8f4794f4c23a46c6c84f6d --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:856ebd53bce776faebf1a69beda1004f5e1d5ac60d8a79ce4439acd65eee1c67 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0a13f7bb2633a802881f9189d28b9cfb3d29b818 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5090307282417746e+18, + "train_loss": 0.45357518162659466, + "train_runtime": 1191.6902, + "train_samples": 12711, + "train_samples_per_second": 31.999, + "train_steps_per_second": 1.002 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0a13f7bb2633a802881f9189d28b9cfb3d29b818 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5090307282417746e+18, + "train_loss": 0.45357518162659466, + "train_runtime": 1191.6902, + "train_samples": 12711, + "train_samples_per_second": 31.999, + "train_steps_per_second": 1.002 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4f31cae9b56a28c44448f8e70dab2adca91a757a --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1709 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1194, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012586532410320957, + "grad_norm": 0.6403277516365051, + "learning_rate": 2e-06, + "loss": 1.6617, + "step": 5 + }, + { + "epoch": 0.025173064820641914, + "grad_norm": 0.586560845375061, + "learning_rate": 4.5e-06, + "loss": 1.6522, + "step": 10 + }, + { + "epoch": 0.03775959723096287, + "grad_norm": 0.5312709808349609, + "learning_rate": 7e-06, + "loss": 1.677, + "step": 15 + }, + { + "epoch": 0.05034612964128383, + "grad_norm": 0.5567413568496704, + "learning_rate": 9.5e-06, + "loss": 1.5948, + "step": 20 + }, + { + "epoch": 0.06293266205160478, + "grad_norm": 0.49981722235679626, + "learning_rate": 1.2e-05, + "loss": 1.5898, + "step": 25 + }, + { + "epoch": 0.07551919446192575, + "grad_norm": 0.8657897710800171, + "learning_rate": 1.45e-05, + "loss": 1.592, + "step": 30 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 0.48474830389022827, + "learning_rate": 1.7e-05, + "loss": 1.5654, + "step": 35 + }, + { + "epoch": 0.10069225928256766, + "grad_norm": 0.48783090710639954, + "learning_rate": 1.95e-05, + "loss": 1.5368, + "step": 40 + }, + { + "epoch": 0.11327879169288861, + "grad_norm": 0.5199222564697266, + "learning_rate": 2.2e-05, + "loss": 1.4628, + "step": 45 + }, + { + "epoch": 0.12586532410320955, + "grad_norm": 0.500883162021637, + "learning_rate": 2.45e-05, + "loss": 1.5115, + "step": 50 + }, + { + "epoch": 0.13845185651353054, + "grad_norm": 0.5227558612823486, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.5251, + "step": 55 + }, + { + "epoch": 0.1510383889238515, + "grad_norm": 0.6153334975242615, + "learning_rate": 2.95e-05, + "loss": 1.4919, + "step": 60 + }, + { + "epoch": 0.16362492133417245, + "grad_norm": 0.5746011734008789, + "learning_rate": 2.999907901949333e-05, + "loss": 1.4087, + "step": 65 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 0.6191392540931702, + "learning_rate": 2.9995337730012244e-05, + "loss": 1.3894, + "step": 70 + }, + { + "epoch": 0.18879798615481436, + "grad_norm": 0.6862143874168396, + "learning_rate": 2.9988719287563452e-05, + "loss": 1.3584, + "step": 75 + }, + { + "epoch": 0.2013845185651353, + "grad_norm": 0.7607653141021729, + "learning_rate": 2.9979224962026403e-05, + "loss": 1.3675, + "step": 80 + }, + { + "epoch": 0.21397105097545627, + "grad_norm": 0.6901122331619263, + "learning_rate": 2.9966856575075773e-05, + "loss": 1.3376, + "step": 85 + }, + { + "epoch": 0.22655758338577722, + "grad_norm": 0.6906163692474365, + "learning_rate": 2.9951616499831916e-05, + "loss": 1.2886, + "step": 90 + }, + { + "epoch": 0.23914411579609818, + "grad_norm": 0.8337923884391785, + "learning_rate": 2.9933507660405544e-05, + "loss": 1.2569, + "step": 95 + }, + { + "epoch": 0.2517306482064191, + "grad_norm": 0.7352173924446106, + "learning_rate": 2.9912533531336682e-05, + "loss": 1.2391, + "step": 100 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 0.884824275970459, + "learning_rate": 2.9888698136928e-05, + "loss": 1.2529, + "step": 105 + }, + { + "epoch": 0.27690371302706107, + "grad_norm": 0.6890048384666443, + "learning_rate": 2.986200605047268e-05, + "loss": 1.1908, + "step": 110 + }, + { + "epoch": 0.289490245437382, + "grad_norm": 0.7272679209709167, + "learning_rate": 2.9832462393376926e-05, + "loss": 1.1472, + "step": 115 + }, + { + "epoch": 0.302076777847703, + "grad_norm": 0.7295804619789124, + "learning_rate": 2.980007283417734e-05, + "loss": 1.1727, + "step": 120 + }, + { + "epoch": 0.3146633102580239, + "grad_norm": 0.7789854407310486, + "learning_rate": 2.9764843587453284e-05, + "loss": 1.096, + "step": 125 + }, + { + "epoch": 0.3272498426683449, + "grad_norm": 0.8440674543380737, + "learning_rate": 2.9726781412634488e-05, + "loss": 1.1122, + "step": 130 + }, + { + "epoch": 0.3398363750786658, + "grad_norm": 0.7501885890960693, + "learning_rate": 2.9685893612704136e-05, + "loss": 1.0931, + "step": 135 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 0.8683258891105652, + "learning_rate": 2.9642188032797633e-05, + "loss": 1.1192, + "step": 140 + }, + { + "epoch": 0.36500943989930773, + "grad_norm": 1.0507310628890991, + "learning_rate": 2.959567305869736e-05, + "loss": 1.0807, + "step": 145 + }, + { + "epoch": 0.3775959723096287, + "grad_norm": 1.0436062812805176, + "learning_rate": 2.954635761522369e-05, + "loss": 1.0505, + "step": 150 + }, + { + "epoch": 0.39018250471994964, + "grad_norm": 0.8705738186836243, + "learning_rate": 2.949425116452261e-05, + "loss": 0.9977, + "step": 155 + }, + { + "epoch": 0.4027690371302706, + "grad_norm": 1.0106712579727173, + "learning_rate": 2.943936370425018e-05, + "loss": 1.0118, + "step": 160 + }, + { + "epoch": 0.41535556954059155, + "grad_norm": 0.8880999088287354, + "learning_rate": 2.9381705765654322e-05, + "loss": 0.9868, + "step": 165 + }, + { + "epoch": 0.42794210195091253, + "grad_norm": 1.2247734069824219, + "learning_rate": 2.932128841155416e-05, + "loss": 0.9503, + "step": 170 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.0579493045806885, + "learning_rate": 2.9258123234217437e-05, + "loss": 0.9284, + "step": 175 + }, + { + "epoch": 0.45311516677155445, + "grad_norm": 1.04769766330719, + "learning_rate": 2.919222235313626e-05, + "loss": 1.003, + "step": 180 + }, + { + "epoch": 0.4657016991818754, + "grad_norm": 1.0481573343276978, + "learning_rate": 2.912359841270177e-05, + "loss": 0.967, + "step": 185 + }, + { + "epoch": 0.47828823159219636, + "grad_norm": 1.049290657043457, + "learning_rate": 2.9052264579778065e-05, + "loss": 0.8928, + "step": 190 + }, + { + "epoch": 0.4908747640025173, + "grad_norm": 1.001838207244873, + "learning_rate": 2.8978234541175855e-05, + "loss": 0.9148, + "step": 195 + }, + { + "epoch": 0.5034612964128382, + "grad_norm": 1.0839954614639282, + "learning_rate": 2.890152250102639e-05, + "loss": 0.9134, + "step": 200 + }, + { + "epoch": 0.5160478288231592, + "grad_norm": 1.07119882106781, + "learning_rate": 2.8822143178056115e-05, + "loss": 1.0062, + "step": 205 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 1.0915242433547974, + "learning_rate": 2.8740111802762587e-05, + "loss": 0.8865, + "step": 210 + }, + { + "epoch": 0.5412208936438011, + "grad_norm": 1.238020896911621, + "learning_rate": 2.86554441144922e-05, + "loss": 0.7977, + "step": 215 + }, + { + "epoch": 0.5538074260541221, + "grad_norm": 1.0616645812988281, + "learning_rate": 2.856815635842029e-05, + "loss": 0.8734, + "step": 220 + }, + { + "epoch": 0.5663939584644431, + "grad_norm": 1.0056395530700684, + "learning_rate": 2.8478265282434157e-05, + "loss": 0.7801, + "step": 225 + }, + { + "epoch": 0.578980490874764, + "grad_norm": 1.135398507118225, + "learning_rate": 2.8385788133919676e-05, + "loss": 0.8187, + "step": 230 + }, + { + "epoch": 0.5915670232850849, + "grad_norm": 1.1715173721313477, + "learning_rate": 2.8290742656452016e-05, + "loss": 0.7508, + "step": 235 + }, + { + "epoch": 0.604153555695406, + "grad_norm": 1.171643614768982, + "learning_rate": 2.8193147086391206e-05, + "loss": 0.8276, + "step": 240 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 1.2682533264160156, + "learning_rate": 2.809302014938312e-05, + "loss": 0.8009, + "step": 245 + }, + { + "epoch": 0.6293266205160478, + "grad_norm": 1.2447189092636108, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.7611, + "step": 250 + }, + { + "epoch": 0.6419131529263687, + "grad_norm": 1.1701849699020386, + "learning_rate": 2.78852495018873e-05, + "loss": 0.8657, + "step": 255 + }, + { + "epoch": 0.6544996853366898, + "grad_norm": 1.2661263942718506, + "learning_rate": 2.7777645656319298e-05, + "loss": 0.6512, + "step": 260 + }, + { + "epoch": 0.6670862177470107, + "grad_norm": 1.0954269170761108, + "learning_rate": 2.7667590165994617e-05, + "loss": 0.7289, + "step": 265 + }, + { + "epoch": 0.6796727501573316, + "grad_norm": 1.3878254890441895, + "learning_rate": 2.7555104147241967e-05, + "loss": 0.7943, + "step": 270 + }, + { + "epoch": 0.6922592825676526, + "grad_norm": 1.634464979171753, + "learning_rate": 2.7440209182735144e-05, + "loss": 0.7055, + "step": 275 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 1.2911537885665894, + "learning_rate": 2.7322927317351962e-05, + "loss": 0.7035, + "step": 280 + }, + { + "epoch": 0.7174323473882945, + "grad_norm": 1.20948326587677, + "learning_rate": 2.7203281053944512e-05, + "loss": 0.6541, + "step": 285 + }, + { + "epoch": 0.7300188797986155, + "grad_norm": 1.240692138671875, + "learning_rate": 2.7081293349021558e-05, + "loss": 0.6787, + "step": 290 + }, + { + "epoch": 0.7426054122089364, + "grad_norm": 1.2488291263580322, + "learning_rate": 2.6956987608343837e-05, + "loss": 0.5883, + "step": 295 + }, + { + "epoch": 0.7551919446192574, + "grad_norm": 1.235083818435669, + "learning_rate": 2.683038768243324e-05, + "loss": 0.6492, + "step": 300 + }, + { + "epoch": 0.7677784770295784, + "grad_norm": 1.1932655572891235, + "learning_rate": 2.670151786199659e-05, + "loss": 0.7245, + "step": 305 + }, + { + "epoch": 0.7803650094398993, + "grad_norm": 1.4202378988265991, + "learning_rate": 2.6570402873264996e-05, + "loss": 0.6325, + "step": 310 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 1.2176381349563599, + "learning_rate": 2.6437067873249648e-05, + "loss": 0.6098, + "step": 315 + }, + { + "epoch": 0.8055380742605412, + "grad_norm": 1.3762211799621582, + "learning_rate": 2.630153844491491e-05, + "loss": 0.673, + "step": 320 + }, + { + "epoch": 0.8181246066708622, + "grad_norm": 1.137423038482666, + "learning_rate": 2.6163840592269775e-05, + "loss": 0.6297, + "step": 325 + }, + { + "epoch": 0.8307111390811831, + "grad_norm": 1.4970413446426392, + "learning_rate": 2.6024000735378423e-05, + "loss": 0.6444, + "step": 330 + }, + { + "epoch": 0.8432976714915041, + "grad_norm": 1.3864353895187378, + "learning_rate": 2.5882045705291054e-05, + "loss": 0.5509, + "step": 335 + }, + { + "epoch": 0.8558842039018251, + "grad_norm": 1.2714463472366333, + "learning_rate": 2.5738002738895777e-05, + "loss": 0.5679, + "step": 340 + }, + { + "epoch": 0.868470736312146, + "grad_norm": 1.2817718982696533, + "learning_rate": 2.559189947369272e-05, + "loss": 0.5422, + "step": 345 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.3693032264709473, + "learning_rate": 2.5443763942491176e-05, + "loss": 0.532, + "step": 350 + }, + { + "epoch": 0.893643801132788, + "grad_norm": 1.3116559982299805, + "learning_rate": 2.5293624568031008e-05, + "loss": 0.5281, + "step": 355 + }, + { + "epoch": 0.9062303335431089, + "grad_norm": 1.2440980672836304, + "learning_rate": 2.514151015752912e-05, + "loss": 0.581, + "step": 360 + }, + { + "epoch": 0.9188168659534298, + "grad_norm": 1.426661729812622, + "learning_rate": 2.4987449897152285e-05, + "loss": 0.5408, + "step": 365 + }, + { + "epoch": 0.9314033983637507, + "grad_norm": 1.3829270601272583, + "learning_rate": 2.4831473346417153e-05, + "loss": 0.5032, + "step": 370 + }, + { + "epoch": 0.9439899307740718, + "grad_norm": 1.3820431232452393, + "learning_rate": 2.467361043251869e-05, + "loss": 0.5295, + "step": 375 + }, + { + "epoch": 0.9565764631843927, + "grad_norm": 1.3880980014801025, + "learning_rate": 2.4513891444588046e-05, + "loss": 0.5863, + "step": 380 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 1.4112951755523682, + "learning_rate": 2.4352347027881003e-05, + "loss": 0.572, + "step": 385 + }, + { + "epoch": 0.9817495280050346, + "grad_norm": 1.6921632289886475, + "learning_rate": 2.4189008177898044e-05, + "loss": 0.4947, + "step": 390 + }, + { + "epoch": 0.9943360604153556, + "grad_norm": 1.265581488609314, + "learning_rate": 2.402390623443727e-05, + "loss": 0.4991, + "step": 395 + }, + { + "epoch": 1.0050346129641283, + "grad_norm": 1.642728328704834, + "learning_rate": 2.3857072875581247e-05, + "loss": 0.4547, + "step": 400 + }, + { + "epoch": 1.0176211453744493, + "grad_norm": 1.3038901090621948, + "learning_rate": 2.368854011161892e-05, + "loss": 0.4372, + "step": 405 + }, + { + "epoch": 1.0302076777847704, + "grad_norm": 1.6279821395874023, + "learning_rate": 2.3518340278903796e-05, + "loss": 0.4228, + "step": 410 + }, + { + "epoch": 1.0427942101950913, + "grad_norm": 1.3009235858917236, + "learning_rate": 2.3346506033649618e-05, + "loss": 0.4388, + "step": 415 + }, + { + "epoch": 1.0553807426054123, + "grad_norm": 1.2368204593658447, + "learning_rate": 2.317307034566456e-05, + "loss": 0.3947, + "step": 420 + }, + { + "epoch": 1.0679672750157332, + "grad_norm": 1.453291416168213, + "learning_rate": 2.2998066492025372e-05, + "loss": 0.4082, + "step": 425 + }, + { + "epoch": 1.0805538074260541, + "grad_norm": 1.3538260459899902, + "learning_rate": 2.282152805069247e-05, + "loss": 0.4003, + "step": 430 + }, + { + "epoch": 1.093140339836375, + "grad_norm": 1.1329612731933594, + "learning_rate": 2.264348889406738e-05, + "loss": 0.4342, + "step": 435 + }, + { + "epoch": 1.105726872246696, + "grad_norm": 1.3863486051559448, + "learning_rate": 2.2463983182493595e-05, + "loss": 0.3855, + "step": 440 + }, + { + "epoch": 1.118313404657017, + "grad_norm": 1.2111226320266724, + "learning_rate": 2.228304535770228e-05, + "loss": 0.4548, + "step": 445 + }, + { + "epoch": 1.1308999370673378, + "grad_norm": 1.2980037927627563, + "learning_rate": 2.210071013620393e-05, + "loss": 0.4035, + "step": 450 + }, + { + "epoch": 1.143486469477659, + "grad_norm": 1.3011761903762817, + "learning_rate": 2.1917012502627298e-05, + "loss": 0.3989, + "step": 455 + }, + { + "epoch": 1.15607300188798, + "grad_norm": 1.3596248626708984, + "learning_rate": 2.1731987703006933e-05, + "loss": 0.4048, + "step": 460 + }, + { + "epoch": 1.1686595342983008, + "grad_norm": 1.3399940729141235, + "learning_rate": 2.1545671238020507e-05, + "loss": 0.3413, + "step": 465 + }, + { + "epoch": 1.1812460667086218, + "grad_norm": 1.3742436170578003, + "learning_rate": 2.13580988561773e-05, + "loss": 0.3758, + "step": 470 + }, + { + "epoch": 1.1938325991189427, + "grad_norm": 1.2217479944229126, + "learning_rate": 2.1169306546959176e-05, + "loss": 0.3599, + "step": 475 + }, + { + "epoch": 1.2064191315292636, + "grad_norm": 1.3453923463821411, + "learning_rate": 2.097933053391524e-05, + "loss": 0.3987, + "step": 480 + }, + { + "epoch": 1.2190056639395848, + "grad_norm": 1.4340107440948486, + "learning_rate": 2.0788207267711672e-05, + "loss": 0.4009, + "step": 485 + }, + { + "epoch": 1.2315921963499057, + "grad_norm": 1.2932227849960327, + "learning_rate": 2.059597341913791e-05, + "loss": 0.3782, + "step": 490 + }, + { + "epoch": 1.2441787287602266, + "grad_norm": 1.3744287490844727, + "learning_rate": 2.0402665872070656e-05, + "loss": 0.3074, + "step": 495 + }, + { + "epoch": 1.2567652611705475, + "grad_norm": 1.4438745975494385, + "learning_rate": 2.0208321716396965e-05, + "loss": 0.2897, + "step": 500 + }, + { + "epoch": 1.2693517935808685, + "grad_norm": 1.2832309007644653, + "learning_rate": 2.0012978240897813e-05, + "loss": 0.344, + "step": 505 + }, + { + "epoch": 1.2819383259911894, + "grad_norm": 1.3038865327835083, + "learning_rate": 1.98166729260935e-05, + "loss": 0.3395, + "step": 510 + }, + { + "epoch": 1.2945248584015103, + "grad_norm": 1.2700951099395752, + "learning_rate": 1.9619443437052282e-05, + "loss": 0.3383, + "step": 515 + }, + { + "epoch": 1.3071113908118313, + "grad_norm": 1.4435508251190186, + "learning_rate": 1.9421327616163564e-05, + "loss": 0.3322, + "step": 520 + }, + { + "epoch": 1.3196979232221522, + "grad_norm": 1.4882357120513916, + "learning_rate": 1.922236347587711e-05, + "loss": 0.3477, + "step": 525 + }, + { + "epoch": 1.3322844556324733, + "grad_norm": 1.537044644355774, + "learning_rate": 1.902258919140956e-05, + "loss": 0.2768, + "step": 530 + }, + { + "epoch": 1.3448709880427943, + "grad_norm": 1.480634093284607, + "learning_rate": 1.882204309341982e-05, + "loss": 0.3263, + "step": 535 + }, + { + "epoch": 1.3574575204531152, + "grad_norm": 1.2875910997390747, + "learning_rate": 1.86207636606545e-05, + "loss": 0.2943, + "step": 540 + }, + { + "epoch": 1.3700440528634361, + "grad_norm": 1.2918517589569092, + "learning_rate": 1.8418789512565048e-05, + "loss": 0.3108, + "step": 545 + }, + { + "epoch": 1.382630585273757, + "grad_norm": 1.3748197555541992, + "learning_rate": 1.8216159401897812e-05, + "loss": 0.3085, + "step": 550 + }, + { + "epoch": 1.395217117684078, + "grad_norm": 1.4507850408554077, + "learning_rate": 1.801291220725859e-05, + "loss": 0.3178, + "step": 555 + }, + { + "epoch": 1.4078036500943991, + "grad_norm": 1.3796157836914062, + "learning_rate": 1.7809086925652953e-05, + "loss": 0.3201, + "step": 560 + }, + { + "epoch": 1.42039018250472, + "grad_norm": 1.6121405363082886, + "learning_rate": 1.760472266500396e-05, + "loss": 0.287, + "step": 565 + }, + { + "epoch": 1.432976714915041, + "grad_norm": 1.4355881214141846, + "learning_rate": 1.7399858636648443e-05, + "loss": 0.3303, + "step": 570 + }, + { + "epoch": 1.445563247325362, + "grad_norm": 1.236096739768982, + "learning_rate": 1.719453414781363e-05, + "loss": 0.2998, + "step": 575 + }, + { + "epoch": 1.4581497797356828, + "grad_norm": 1.1396565437316895, + "learning_rate": 1.6988788594075193e-05, + "loss": 0.302, + "step": 580 + }, + { + "epoch": 1.4707363121460038, + "grad_norm": 1.4614214897155762, + "learning_rate": 1.678266145179846e-05, + "loss": 0.252, + "step": 585 + }, + { + "epoch": 1.4833228445563247, + "grad_norm": 1.2323857545852661, + "learning_rate": 1.6576192270564096e-05, + "loss": 0.2727, + "step": 590 + }, + { + "epoch": 1.4959093769666456, + "grad_norm": 1.2046157121658325, + "learning_rate": 1.6369420665579727e-05, + "loss": 0.2547, + "step": 595 + }, + { + "epoch": 1.5084959093769665, + "grad_norm": 1.4330130815505981, + "learning_rate": 1.6162386310078966e-05, + "loss": 0.3065, + "step": 600 + }, + { + "epoch": 1.5210824417872875, + "grad_norm": 1.3772592544555664, + "learning_rate": 1.595512892770933e-05, + "loss": 0.2974, + "step": 605 + }, + { + "epoch": 1.5336689741976086, + "grad_norm": 1.2593746185302734, + "learning_rate": 1.5747688284910457e-05, + "loss": 0.2274, + "step": 610 + }, + { + "epoch": 1.5462555066079295, + "grad_norm": 1.4423153400421143, + "learning_rate": 1.554010418328415e-05, + "loss": 0.2548, + "step": 615 + }, + { + "epoch": 1.5588420390182505, + "grad_norm": 1.2069021463394165, + "learning_rate": 1.5332416451957603e-05, + "loss": 0.2669, + "step": 620 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.3519009351730347, + "learning_rate": 1.5124664939941458e-05, + "loss": 0.2515, + "step": 625 + }, + { + "epoch": 1.5840151038388925, + "grad_norm": 1.2931946516036987, + "learning_rate": 1.4916889508483922e-05, + "loss": 0.2843, + "step": 630 + }, + { + "epoch": 1.5966016362492135, + "grad_norm": 1.4394770860671997, + "learning_rate": 1.4709130023422636e-05, + "loss": 0.2498, + "step": 635 + }, + { + "epoch": 1.6091881686595344, + "grad_norm": 1.3603404760360718, + "learning_rate": 1.4501426347535602e-05, + "loss": 0.2611, + "step": 640 + }, + { + "epoch": 1.6217747010698553, + "grad_norm": 1.4230326414108276, + "learning_rate": 1.4293818332892725e-05, + "loss": 0.2734, + "step": 645 + }, + { + "epoch": 1.6343612334801763, + "grad_norm": 1.7960079908370972, + "learning_rate": 1.4086345813209398e-05, + "loss": 0.2438, + "step": 650 + }, + { + "epoch": 1.6469477658904972, + "grad_norm": 1.4120336771011353, + "learning_rate": 1.3879048596203637e-05, + "loss": 0.2486, + "step": 655 + }, + { + "epoch": 1.6595342983008181, + "grad_norm": 1.5952705144882202, + "learning_rate": 1.3671966455958143e-05, + "loss": 0.2377, + "step": 660 + }, + { + "epoch": 1.672120830711139, + "grad_norm": 1.2097992897033691, + "learning_rate": 1.3465139125288884e-05, + "loss": 0.2182, + "step": 665 + }, + { + "epoch": 1.68470736312146, + "grad_norm": 1.328300952911377, + "learning_rate": 1.3258606288121545e-05, + "loss": 0.2489, + "step": 670 + }, + { + "epoch": 1.697293895531781, + "grad_norm": 1.3122562170028687, + "learning_rate": 1.3052407571877415e-05, + "loss": 0.254, + "step": 675 + }, + { + "epoch": 1.7098804279421018, + "grad_norm": 1.1172245740890503, + "learning_rate": 1.2846582539870034e-05, + "loss": 0.2142, + "step": 680 + }, + { + "epoch": 1.7224669603524227, + "grad_norm": 1.2993172407150269, + "learning_rate": 1.2641170683714222e-05, + "loss": 0.2362, + "step": 685 + }, + { + "epoch": 1.735053492762744, + "grad_norm": 1.2438275814056396, + "learning_rate": 1.2436211415748809e-05, + "loss": 0.2194, + "step": 690 + }, + { + "epoch": 1.7476400251730648, + "grad_norm": 1.1280429363250732, + "learning_rate": 1.223174406147461e-05, + "loss": 0.2557, + "step": 695 + }, + { + "epoch": 1.7602265575833858, + "grad_norm": 1.3084126710891724, + "learning_rate": 1.202780785200904e-05, + "loss": 0.234, + "step": 700 + }, + { + "epoch": 1.7728130899937067, + "grad_norm": 1.342391014099121, + "learning_rate": 1.1824441916558843e-05, + "loss": 0.2128, + "step": 705 + }, + { + "epoch": 1.7853996224040278, + "grad_norm": 1.3759722709655762, + "learning_rate": 1.1621685274912381e-05, + "loss": 0.2079, + "step": 710 + }, + { + "epoch": 1.7979861548143488, + "grad_norm": 1.5465227365493774, + "learning_rate": 1.1419576829952935e-05, + "loss": 0.2159, + "step": 715 + }, + { + "epoch": 1.8105726872246697, + "grad_norm": 1.2304023504257202, + "learning_rate": 1.1218155360194368e-05, + "loss": 0.2084, + "step": 720 + }, + { + "epoch": 1.8231592196349906, + "grad_norm": 1.3247654438018799, + "learning_rate": 1.1017459512340742e-05, + "loss": 0.1753, + "step": 725 + }, + { + "epoch": 1.8357457520453115, + "grad_norm": 1.2426429986953735, + "learning_rate": 1.0817527793871143e-05, + "loss": 0.1901, + "step": 730 + }, + { + "epoch": 1.8483322844556325, + "grad_norm": 1.3994040489196777, + "learning_rate": 1.0618398565651315e-05, + "loss": 0.1731, + "step": 735 + }, + { + "epoch": 1.8609188168659534, + "grad_norm": 1.4200652837753296, + "learning_rate": 1.0420110034573304e-05, + "loss": 0.197, + "step": 740 + }, + { + "epoch": 1.8735053492762743, + "grad_norm": 1.1847480535507202, + "learning_rate": 1.0222700246224735e-05, + "loss": 0.1632, + "step": 745 + }, + { + "epoch": 1.8860918816865953, + "grad_norm": 1.3459926843643188, + "learning_rate": 1.0026207077589017e-05, + "loss": 0.1941, + "step": 750 + }, + { + "epoch": 1.8986784140969162, + "grad_norm": 1.4676244258880615, + "learning_rate": 9.83066822977789e-06, + "loss": 0.1837, + "step": 755 + }, + { + "epoch": 1.911264946507237, + "grad_norm": 1.0442742109298706, + "learning_rate": 9.636121220797708e-06, + "loss": 0.1903, + "step": 760 + }, + { + "epoch": 1.9238514789175583, + "grad_norm": 1.385401964187622, + "learning_rate": 9.44260337835088e-06, + "loss": 0.1826, + "step": 765 + }, + { + "epoch": 1.9364380113278792, + "grad_norm": 1.176165223121643, + "learning_rate": 9.250151832673785e-06, + "loss": 0.1857, + "step": 770 + }, + { + "epoch": 1.9490245437382, + "grad_norm": 1.1363468170166016, + "learning_rate": 9.058803509412647e-06, + "loss": 0.1743, + "step": 775 + }, + { + "epoch": 1.961611076148521, + "grad_norm": 1.1079814434051514, + "learning_rate": 8.868595122538569e-06, + "loss": 0.192, + "step": 780 + }, + { + "epoch": 1.9741976085588422, + "grad_norm": 1.1941912174224854, + "learning_rate": 8.679563167303242e-06, + "loss": 0.1903, + "step": 785 + }, + { + "epoch": 1.9867841409691631, + "grad_norm": 1.3019009828567505, + "learning_rate": 8.491743913236629e-06, + "loss": 0.1836, + "step": 790 + }, + { + "epoch": 1.999370673379484, + "grad_norm": 1.1499392986297607, + "learning_rate": 8.305173397187912e-06, + "loss": 0.1604, + "step": 795 + }, + { + "epoch": 2.0100692259282567, + "grad_norm": 1.0508766174316406, + "learning_rate": 8.119887416411119e-06, + "loss": 0.1403, + "step": 800 + }, + { + "epoch": 2.0226557583385776, + "grad_norm": 1.322184443473816, + "learning_rate": 7.935921521696703e-06, + "loss": 0.1706, + "step": 805 + }, + { + "epoch": 2.0352422907488985, + "grad_norm": 1.0707652568817139, + "learning_rate": 7.753311010550423e-06, + "loss": 0.1421, + "step": 810 + }, + { + "epoch": 2.0478288231592194, + "grad_norm": 1.0650321245193481, + "learning_rate": 7.572090920420831e-06, + "loss": 0.1229, + "step": 815 + }, + { + "epoch": 2.060415355569541, + "grad_norm": 1.2199265956878662, + "learning_rate": 7.392296021976615e-06, + "loss": 0.1552, + "step": 820 + }, + { + "epoch": 2.0730018879798617, + "grad_norm": 1.024841547012329, + "learning_rate": 7.21396081243517e-06, + "loss": 0.1188, + "step": 825 + }, + { + "epoch": 2.0855884203901827, + "grad_norm": 1.1309313774108887, + "learning_rate": 7.037119508943623e-06, + "loss": 0.1278, + "step": 830 + }, + { + "epoch": 2.0981749528005036, + "grad_norm": 1.0303701162338257, + "learning_rate": 6.86180604201361e-06, + "loss": 0.1577, + "step": 835 + }, + { + "epoch": 2.1107614852108245, + "grad_norm": 1.234488844871521, + "learning_rate": 6.688054049011e-06, + "loss": 0.1286, + "step": 840 + }, + { + "epoch": 2.1233480176211454, + "grad_norm": 1.1033024787902832, + "learning_rate": 6.515896867701924e-06, + "loss": 0.1216, + "step": 845 + }, + { + "epoch": 2.1359345500314664, + "grad_norm": 1.1684556007385254, + "learning_rate": 6.345367529856254e-06, + "loss": 0.1436, + "step": 850 + }, + { + "epoch": 2.1485210824417873, + "grad_norm": 1.1145367622375488, + "learning_rate": 6.176498754909823e-06, + "loss": 0.1266, + "step": 855 + }, + { + "epoch": 2.1611076148521082, + "grad_norm": 1.0481644868850708, + "learning_rate": 6.009322943686515e-06, + "loss": 0.1199, + "step": 860 + }, + { + "epoch": 2.173694147262429, + "grad_norm": 0.9159044027328491, + "learning_rate": 5.843872172181554e-06, + "loss": 0.0981, + "step": 865 + }, + { + "epoch": 2.18628067967275, + "grad_norm": 1.0690033435821533, + "learning_rate": 5.680178185407073e-06, + "loss": 0.1131, + "step": 870 + }, + { + "epoch": 2.198867212083071, + "grad_norm": 1.2365037202835083, + "learning_rate": 5.518272391301223e-06, + "loss": 0.1337, + "step": 875 + }, + { + "epoch": 2.211453744493392, + "grad_norm": 1.05690336227417, + "learning_rate": 5.3581858547019095e-06, + "loss": 0.1348, + "step": 880 + }, + { + "epoch": 2.224040276903713, + "grad_norm": 1.0568009614944458, + "learning_rate": 5.199949291386409e-06, + "loss": 0.1281, + "step": 885 + }, + { + "epoch": 2.236626809314034, + "grad_norm": 1.023875117301941, + "learning_rate": 5.043593062177925e-06, + "loss": 0.1127, + "step": 890 + }, + { + "epoch": 2.2492133417243547, + "grad_norm": 0.9061659574508667, + "learning_rate": 4.889147167120268e-06, + "loss": 0.0972, + "step": 895 + }, + { + "epoch": 2.2617998741346756, + "grad_norm": 1.2229218482971191, + "learning_rate": 4.736641239721761e-06, + "loss": 0.1229, + "step": 900 + }, + { + "epoch": 2.274386406544997, + "grad_norm": 0.890901505947113, + "learning_rate": 4.586104541269444e-06, + "loss": 0.1215, + "step": 905 + }, + { + "epoch": 2.286972938955318, + "grad_norm": 1.0121715068817139, + "learning_rate": 4.4375659552147245e-06, + "loss": 0.1286, + "step": 910 + }, + { + "epoch": 2.299559471365639, + "grad_norm": 1.0148580074310303, + "learning_rate": 4.2910539816315166e-06, + "loss": 0.1322, + "step": 915 + }, + { + "epoch": 2.31214600377596, + "grad_norm": 0.9136136174201965, + "learning_rate": 4.146596731747938e-06, + "loss": 0.1183, + "step": 920 + }, + { + "epoch": 2.3247325361862807, + "grad_norm": 1.1469573974609375, + "learning_rate": 4.004221922552608e-06, + "loss": 0.0982, + "step": 925 + }, + { + "epoch": 2.3373190685966017, + "grad_norm": 0.9710603356361389, + "learning_rate": 3.863956871476593e-06, + "loss": 0.1033, + "step": 930 + }, + { + "epoch": 2.3499056010069226, + "grad_norm": 1.0158684253692627, + "learning_rate": 3.7258284911520273e-06, + "loss": 0.1251, + "step": 935 + }, + { + "epoch": 2.3624921334172435, + "grad_norm": 0.7837651968002319, + "learning_rate": 3.5898632842483746e-06, + "loss": 0.1107, + "step": 940 + }, + { + "epoch": 2.3750786658275644, + "grad_norm": 0.88968425989151, + "learning_rate": 3.4560873383873713e-06, + "loss": 0.1117, + "step": 945 + }, + { + "epoch": 2.3876651982378854, + "grad_norm": 0.8716320991516113, + "learning_rate": 3.3245263211376e-06, + "loss": 0.1058, + "step": 950 + }, + { + "epoch": 2.4002517306482063, + "grad_norm": 1.0353062152862549, + "learning_rate": 3.1952054750896677e-06, + "loss": 0.1027, + "step": 955 + }, + { + "epoch": 2.412838263058527, + "grad_norm": 0.920871913433075, + "learning_rate": 3.0681496130128903e-06, + "loss": 0.108, + "step": 960 + }, + { + "epoch": 2.425424795468848, + "grad_norm": 0.9126585721969604, + "learning_rate": 2.9433831130944926e-06, + "loss": 0.122, + "step": 965 + }, + { + "epoch": 2.4380113278791695, + "grad_norm": 0.9858216047286987, + "learning_rate": 2.8209299142621527e-06, + "loss": 0.1119, + "step": 970 + }, + { + "epoch": 2.4505978602894904, + "grad_norm": 0.907102108001709, + "learning_rate": 2.7008135115908657e-06, + "loss": 0.1088, + "step": 975 + }, + { + "epoch": 2.4631843926998114, + "grad_norm": 0.9393854141235352, + "learning_rate": 2.583056951794922e-06, + "loss": 0.103, + "step": 980 + }, + { + "epoch": 2.4757709251101323, + "grad_norm": 0.8112927675247192, + "learning_rate": 2.467682828805956e-06, + "loss": 0.0965, + "step": 985 + }, + { + "epoch": 2.4883574575204532, + "grad_norm": 0.9265113472938538, + "learning_rate": 2.3547132794378395e-06, + "loss": 0.1182, + "step": 990 + }, + { + "epoch": 2.500943989930774, + "grad_norm": 0.7953263521194458, + "learning_rate": 2.244169979139315e-06, + "loss": 0.1036, + "step": 995 + }, + { + "epoch": 2.513530522341095, + "grad_norm": 0.9314330816268921, + "learning_rate": 2.1360741378351074e-06, + "loss": 0.1053, + "step": 1000 + }, + { + "epoch": 2.526117054751416, + "grad_norm": 0.8704200387001038, + "learning_rate": 2.030446495856392e-06, + "loss": 0.0885, + "step": 1005 + }, + { + "epoch": 2.538703587161737, + "grad_norm": 0.8757153153419495, + "learning_rate": 1.927307319961345e-06, + "loss": 0.1019, + "step": 1010 + }, + { + "epoch": 2.551290119572058, + "grad_norm": 0.9093301892280579, + "learning_rate": 1.8266763994465702e-06, + "loss": 0.1084, + "step": 1015 + }, + { + "epoch": 2.563876651982379, + "grad_norm": 1.0127232074737549, + "learning_rate": 1.7285730423501328e-06, + "loss": 0.1088, + "step": 1020 + }, + { + "epoch": 2.5764631843926997, + "grad_norm": 0.9451376795768738, + "learning_rate": 1.6330160717469133e-06, + "loss": 0.0998, + "step": 1025 + }, + { + "epoch": 2.5890497168030207, + "grad_norm": 0.869429886341095, + "learning_rate": 1.5400238221370416e-06, + "loss": 0.0943, + "step": 1030 + }, + { + "epoch": 2.6016362492133416, + "grad_norm": 1.0575133562088013, + "learning_rate": 1.4496141359280557e-06, + "loss": 0.081, + "step": 1035 + }, + { + "epoch": 2.6142227816236625, + "grad_norm": 0.8123021721839905, + "learning_rate": 1.3618043600114804e-06, + "loss": 0.0875, + "step": 1040 + }, + { + "epoch": 2.6268093140339834, + "grad_norm": 0.7598457932472229, + "learning_rate": 1.2766113424344816e-06, + "loss": 0.0923, + "step": 1045 + }, + { + "epoch": 2.6393958464443044, + "grad_norm": 1.0861306190490723, + "learning_rate": 1.1940514291672382e-06, + "loss": 0.0927, + "step": 1050 + }, + { + "epoch": 2.6519823788546253, + "grad_norm": 0.7725820541381836, + "learning_rate": 1.114140460966645e-06, + "loss": 0.0838, + "step": 1055 + }, + { + "epoch": 2.6645689112649467, + "grad_norm": 0.6658778786659241, + "learning_rate": 1.036893770336938e-06, + "loss": 0.0853, + "step": 1060 + }, + { + "epoch": 2.6771554436752676, + "grad_norm": 1.0075525045394897, + "learning_rate": 9.623261785878584e-07, + "loss": 0.096, + "step": 1065 + }, + { + "epoch": 2.6897419760855885, + "grad_norm": 0.7621735334396362, + "learning_rate": 8.904519929908905e-07, + "loss": 0.1033, + "step": 1070 + }, + { + "epoch": 2.7023285084959094, + "grad_norm": 0.707410454750061, + "learning_rate": 8.212850040341274e-07, + "loss": 0.0955, + "step": 1075 + }, + { + "epoch": 2.7149150409062304, + "grad_norm": 0.8618693947792053, + "learning_rate": 7.548384827762911e-07, + "loss": 0.0814, + "step": 1080 + }, + { + "epoch": 2.7275015733165513, + "grad_norm": 0.8130265474319458, + "learning_rate": 6.911251783004091e-07, + "loss": 0.099, + "step": 1085 + }, + { + "epoch": 2.7400881057268722, + "grad_norm": 0.6948117017745972, + "learning_rate": 6.301573152676665e-07, + "loss": 0.0893, + "step": 1090 + }, + { + "epoch": 2.752674638137193, + "grad_norm": 0.8569169640541077, + "learning_rate": 5.71946591571852e-07, + "loss": 0.0985, + "step": 1095 + }, + { + "epoch": 2.765261170547514, + "grad_norm": 0.8096495866775513, + "learning_rate": 5.165041760948796e-07, + "loss": 0.1015, + "step": 1100 + }, + { + "epoch": 2.777847702957835, + "grad_norm": 0.8831220269203186, + "learning_rate": 4.6384070656383227e-07, + "loss": 0.0859, + "step": 1105 + }, + { + "epoch": 2.790434235368156, + "grad_norm": 0.7621248960494995, + "learning_rate": 4.1396628750988485e-07, + "loss": 0.0959, + "step": 1110 + }, + { + "epoch": 2.8030207677784773, + "grad_norm": 0.8468384742736816, + "learning_rate": 3.6689048832957093e-07, + "loss": 0.1096, + "step": 1115 + }, + { + "epoch": 2.8156073001887982, + "grad_norm": 0.7720547914505005, + "learning_rate": 3.2262234144868117e-07, + "loss": 0.1001, + "step": 1120 + }, + { + "epoch": 2.828193832599119, + "grad_norm": 0.7954249382019043, + "learning_rate": 2.811703405892296e-07, + "loss": 0.0966, + "step": 1125 + }, + { + "epoch": 2.84078036500944, + "grad_norm": 0.6872217059135437, + "learning_rate": 2.425424391397607e-07, + "loss": 0.0844, + "step": 1130 + }, + { + "epoch": 2.853366897419761, + "grad_norm": 0.8678398132324219, + "learning_rate": 2.0674604862932656e-07, + "loss": 0.1047, + "step": 1135 + }, + { + "epoch": 2.865953429830082, + "grad_norm": 0.7691945433616638, + "learning_rate": 1.7378803730545122e-07, + "loss": 0.0977, + "step": 1140 + }, + { + "epoch": 2.878539962240403, + "grad_norm": 0.7832962870597839, + "learning_rate": 1.436747288163115e-07, + "loss": 0.1084, + "step": 1145 + }, + { + "epoch": 2.891126494650724, + "grad_norm": 0.735747218132019, + "learning_rate": 1.1641190099741905e-07, + "loss": 0.0917, + "step": 1150 + }, + { + "epoch": 2.9037130270610447, + "grad_norm": 0.8435922265052795, + "learning_rate": 9.200478476303565e-08, + "loss": 0.0825, + "step": 1155 + }, + { + "epoch": 2.9162995594713657, + "grad_norm": 0.8477237224578857, + "learning_rate": 7.045806310251257e-08, + "loss": 0.1023, + "step": 1160 + }, + { + "epoch": 2.9288860918816866, + "grad_norm": 0.8861922025680542, + "learning_rate": 5.177587018176777e-08, + "loss": 0.1037, + "step": 1165 + }, + { + "epoch": 2.9414726242920075, + "grad_norm": 0.7870932221412659, + "learning_rate": 3.596179055006365e-08, + "loss": 0.0944, + "step": 1170 + }, + { + "epoch": 2.9540591567023284, + "grad_norm": 0.8462067246437073, + "learning_rate": 2.301885845224061e-08, + "loss": 0.0926, + "step": 1175 + }, + { + "epoch": 2.9666456891126494, + "grad_norm": 0.8935102224349976, + "learning_rate": 1.294955724653768e-08, + "loss": 0.1084, + "step": 1180 + }, + { + "epoch": 2.9792322215229703, + "grad_norm": 0.8632627725601196, + "learning_rate": 5.7558189281120555e-09, + "loss": 0.0923, + "step": 1185 + }, + { + "epoch": 2.991818753933291, + "grad_norm": 0.7596596479415894, + "learning_rate": 1.4390237583405963e-09, + "loss": 0.0928, + "step": 1190 + }, + { + "epoch": 3.0, + "step": 1194, + "total_flos": 1.5090307282417746e+18, + "train_loss": 0.45357518162659466, + "train_runtime": 1191.6902, + "train_samples_per_second": 31.999, + "train_steps_per_second": 1.002 + } + ], + "logging_steps": 5, + "max_steps": 1194, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5090307282417746e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..74d2945261f532f549ecc83980196762bda6037f --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/3_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d52dc283ebfc6f48eaa1ff1e793d1af87b69fcc20d00d418d31fb8f97b94cf0d +size 8273 diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..83c7fe5c7281619f8fb3e142f925c865869922f9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 4_128_e3_3e-5 + results: [] +--- + + + +# 4_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9aa4d60dc71cded341385bbf7b4c12c7b38a570 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "gate_proj", + "k_proj", + "q_proj", + "up_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69b5b625b7aaa41af64985cfb0c4f80c9c5889ea --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93127603b9259fe4ff1e38f2001d6c1ee7e8cc54238a9786ace8c04829c9f19b +size 671150064 diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d3f71b13cd00e99fa12aa2e47ab9a3bdb934959b --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4414949418081976e+18, + "train_loss": 0.43326316522896713, + "train_runtime": 1137.0408, + "train_samples": 12014, + "train_samples_per_second": 31.698, + "train_steps_per_second": 0.992 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d3f71b13cd00e99fa12aa2e47ab9a3bdb934959b --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4414949418081976e+18, + "train_loss": 0.43326316522896713, + "train_runtime": 1137.0408, + "train_samples": 12014, + "train_samples_per_second": 31.698, + "train_steps_per_second": 0.992 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aeb93b063e0b53efcb332b67a3279a086f6b985d --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1618 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013315579227696404, + "grad_norm": 0.6600546836853027, + "learning_rate": 2.1052631578947366e-06, + "loss": 1.5927, + "step": 5 + }, + { + "epoch": 0.02663115845539281, + "grad_norm": 0.5942349433898926, + "learning_rate": 4.736842105263158e-06, + "loss": 1.599, + "step": 10 + }, + { + "epoch": 0.03994673768308921, + "grad_norm": 0.649502694606781, + "learning_rate": 7.3684210526315784e-06, + "loss": 1.587, + "step": 15 + }, + { + "epoch": 0.05326231691078562, + "grad_norm": 0.5164840817451477, + "learning_rate": 9.999999999999999e-06, + "loss": 1.5834, + "step": 20 + }, + { + "epoch": 0.06657789613848203, + "grad_norm": 0.519739031791687, + "learning_rate": 1.263157894736842e-05, + "loss": 1.5769, + "step": 25 + }, + { + "epoch": 0.07989347536617843, + "grad_norm": 0.4934748411178589, + "learning_rate": 1.5263157894736842e-05, + "loss": 1.5236, + "step": 30 + }, + { + "epoch": 0.09320905459387484, + "grad_norm": 0.5344314575195312, + "learning_rate": 1.7894736842105264e-05, + "loss": 1.5076, + "step": 35 + }, + { + "epoch": 0.10652463382157124, + "grad_norm": 0.48246416449546814, + "learning_rate": 2.0526315789473685e-05, + "loss": 1.4387, + "step": 40 + }, + { + "epoch": 0.11984021304926765, + "grad_norm": 0.5060396790504456, + "learning_rate": 2.3157894736842103e-05, + "loss": 1.4836, + "step": 45 + }, + { + "epoch": 0.13315579227696406, + "grad_norm": 0.5308860540390015, + "learning_rate": 2.578947368421053e-05, + "loss": 1.4441, + "step": 50 + }, + { + "epoch": 0.14647137150466044, + "grad_norm": 0.5403273105621338, + "learning_rate": 2.8421052631578946e-05, + "loss": 1.3603, + "step": 55 + }, + { + "epoch": 0.15978695073235685, + "grad_norm": 0.5766253471374512, + "learning_rate": 2.9999741868614275e-05, + "loss": 1.3993, + "step": 60 + }, + { + "epoch": 0.17310252996005326, + "grad_norm": 0.5355752110481262, + "learning_rate": 2.999683799255387e-05, + "loss": 1.3608, + "step": 65 + }, + { + "epoch": 0.18641810918774968, + "grad_norm": 0.5414921045303345, + "learning_rate": 2.9990708202925038e-05, + "loss": 1.3738, + "step": 70 + }, + { + "epoch": 0.19973368841544606, + "grad_norm": 0.5951070189476013, + "learning_rate": 2.9981353818283835e-05, + "loss": 1.316, + "step": 75 + }, + { + "epoch": 0.21304926764314247, + "grad_norm": 0.6604633927345276, + "learning_rate": 2.996877685081685e-05, + "loss": 1.2847, + "step": 80 + }, + { + "epoch": 0.22636484687083888, + "grad_norm": 0.6302446126937866, + "learning_rate": 2.995298000590839e-05, + "loss": 1.3303, + "step": 85 + }, + { + "epoch": 0.2396804260985353, + "grad_norm": 0.6784277558326721, + "learning_rate": 2.99339666815585e-05, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.2529960053262317, + "grad_norm": 0.7011968493461609, + "learning_rate": 2.9911740967652065e-05, + "loss": 1.2185, + "step": 95 + }, + { + "epoch": 0.2663115845539281, + "grad_norm": 0.7723592519760132, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1832, + "step": 100 + }, + { + "epoch": 0.2796271637816245, + "grad_norm": 0.7174793481826782, + "learning_rate": 2.9857672184706038e-05, + "loss": 1.1094, + "step": 105 + }, + { + "epoch": 0.2929427430093209, + "grad_norm": 0.7712554335594177, + "learning_rate": 2.9825840746199534e-05, + "loss": 1.1092, + "step": 110 + }, + { + "epoch": 0.3062583222370173, + "grad_norm": 0.8383224010467529, + "learning_rate": 2.9790820176700872e-05, + "loss": 1.137, + "step": 115 + }, + { + "epoch": 0.3195739014647137, + "grad_norm": 0.8743329048156738, + "learning_rate": 2.975261800935339e-05, + "loss": 1.0725, + "step": 120 + }, + { + "epoch": 0.33288948069241014, + "grad_norm": 0.8880398869514465, + "learning_rate": 2.971124246168202e-05, + "loss": 1.0882, + "step": 125 + }, + { + "epoch": 0.34620505992010653, + "grad_norm": 0.9342496991157532, + "learning_rate": 2.9666702433825614e-05, + "loss": 1.1032, + "step": 130 + }, + { + "epoch": 0.3595206391478029, + "grad_norm": 0.8069312572479248, + "learning_rate": 2.9619007506622506e-05, + "loss": 1.0588, + "step": 135 + }, + { + "epoch": 0.37283621837549935, + "grad_norm": 0.8948056697845459, + "learning_rate": 2.956816793954958e-05, + "loss": 1.0412, + "step": 140 + }, + { + "epoch": 0.38615179760319573, + "grad_norm": 0.9484046101570129, + "learning_rate": 2.951419466851542e-05, + "loss": 1.0058, + "step": 145 + }, + { + "epoch": 0.3994673768308921, + "grad_norm": 0.9705715179443359, + "learning_rate": 2.9457099303507904e-05, + "loss": 0.9668, + "step": 150 + }, + { + "epoch": 0.41278295605858856, + "grad_norm": 0.9719052314758301, + "learning_rate": 2.939689412609684e-05, + "loss": 0.9872, + "step": 155 + }, + { + "epoch": 0.42609853528628494, + "grad_norm": 1.1409313678741455, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.9508, + "step": 160 + }, + { + "epoch": 0.4394141145139814, + "grad_norm": 1.0305752754211426, + "learning_rate": 2.9267206802257952e-05, + "loss": 0.9346, + "step": 165 + }, + { + "epoch": 0.45272969374167776, + "grad_norm": 1.0236687660217285, + "learning_rate": 2.919775255238392e-05, + "loss": 0.9113, + "step": 170 + }, + { + "epoch": 0.46604527296937415, + "grad_norm": 1.0788071155548096, + "learning_rate": 2.9125244277213176e-05, + "loss": 0.8853, + "step": 175 + }, + { + "epoch": 0.4793608521970706, + "grad_norm": 1.0246715545654297, + "learning_rate": 2.9049697573728818e-05, + "loss": 0.9377, + "step": 180 + }, + { + "epoch": 0.49267643142476697, + "grad_norm": 1.0355359315872192, + "learning_rate": 2.8971128692498872e-05, + "loss": 0.8587, + "step": 185 + }, + { + "epoch": 0.5059920106524634, + "grad_norm": 1.2902796268463135, + "learning_rate": 2.8889554534180664e-05, + "loss": 0.8014, + "step": 190 + }, + { + "epoch": 0.5193075898801598, + "grad_norm": 1.0152711868286133, + "learning_rate": 2.8804992645885415e-05, + "loss": 0.8936, + "step": 195 + }, + { + "epoch": 0.5326231691078562, + "grad_norm": 1.051537275314331, + "learning_rate": 2.8717461217403726e-05, + "loss": 0.8338, + "step": 200 + }, + { + "epoch": 0.5459387483355526, + "grad_norm": 1.107377290725708, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.7902, + "step": 205 + }, + { + "epoch": 0.559254327563249, + "grad_norm": 1.1055326461791992, + "learning_rate": 2.853356568882657e-05, + "loss": 0.8523, + "step": 210 + }, + { + "epoch": 0.5725699067909454, + "grad_norm": 1.2028801441192627, + "learning_rate": 2.843724114580848e-05, + "loss": 0.8248, + "step": 215 + }, + { + "epoch": 0.5858854860186418, + "grad_norm": 1.4174926280975342, + "learning_rate": 2.833802616824972e-05, + "loss": 0.761, + "step": 220 + }, + { + "epoch": 0.5992010652463382, + "grad_norm": 1.3308601379394531, + "learning_rate": 2.8235942097911964e-05, + "loss": 0.7464, + "step": 225 + }, + { + "epoch": 0.6125166444740346, + "grad_norm": 1.2039014101028442, + "learning_rate": 2.8131010893716676e-05, + "loss": 0.7568, + "step": 230 + }, + { + "epoch": 0.625832223701731, + "grad_norm": 1.1827210187911987, + "learning_rate": 2.8023255127021593e-05, + "loss": 0.7722, + "step": 235 + }, + { + "epoch": 0.6391478029294274, + "grad_norm": 1.4149396419525146, + "learning_rate": 2.7912697976765516e-05, + "loss": 0.7142, + "step": 240 + }, + { + "epoch": 0.6524633821571239, + "grad_norm": 1.204516887664795, + "learning_rate": 2.7799363224482334e-05, + "loss": 0.7163, + "step": 245 + }, + { + "epoch": 0.6657789613848203, + "grad_norm": 1.17918062210083, + "learning_rate": 2.7683275249185507e-05, + "loss": 0.6592, + "step": 250 + }, + { + "epoch": 0.6790945406125166, + "grad_norm": 1.2077648639678955, + "learning_rate": 2.7564459022123953e-05, + "loss": 0.6945, + "step": 255 + }, + { + "epoch": 0.6924101198402131, + "grad_norm": 1.1579927206039429, + "learning_rate": 2.744294010141061e-05, + "loss": 0.6409, + "step": 260 + }, + { + "epoch": 0.7057256990679095, + "grad_norm": 1.2698099613189697, + "learning_rate": 2.7318744626524704e-05, + "loss": 0.6286, + "step": 265 + }, + { + "epoch": 0.7190412782956058, + "grad_norm": 1.139428734779358, + "learning_rate": 2.719189931268899e-05, + "loss": 0.6426, + "step": 270 + }, + { + "epoch": 0.7323568575233023, + "grad_norm": 1.207734227180481, + "learning_rate": 2.7062431445123127e-05, + "loss": 0.6584, + "step": 275 + }, + { + "epoch": 0.7456724367509987, + "grad_norm": 1.3638228178024292, + "learning_rate": 2.6930368873174493e-05, + "loss": 0.6002, + "step": 280 + }, + { + "epoch": 0.758988015978695, + "grad_norm": 1.3187388181686401, + "learning_rate": 2.6795740004327584e-05, + "loss": 0.5852, + "step": 285 + }, + { + "epoch": 0.7723035952063915, + "grad_norm": 1.207688331604004, + "learning_rate": 2.665857379809338e-05, + "loss": 0.5817, + "step": 290 + }, + { + "epoch": 0.7856191744340879, + "grad_norm": 1.3150652647018433, + "learning_rate": 2.6518899759780017e-05, + "loss": 0.6065, + "step": 295 + }, + { + "epoch": 0.7989347536617842, + "grad_norm": 1.2341006994247437, + "learning_rate": 2.637674793414596e-05, + "loss": 0.6191, + "step": 300 + }, + { + "epoch": 0.8122503328894807, + "grad_norm": 1.2045552730560303, + "learning_rate": 2.6232148898937223e-05, + "loss": 0.5604, + "step": 305 + }, + { + "epoch": 0.8255659121171771, + "grad_norm": 1.2406785488128662, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.543, + "step": 310 + }, + { + "epoch": 0.8388814913448736, + "grad_norm": 1.4761314392089844, + "learning_rate": 2.5935734136139407e-05, + "loss": 0.5132, + "step": 315 + }, + { + "epoch": 0.8521970705725699, + "grad_norm": 1.3428096771240234, + "learning_rate": 2.5783982169218125e-05, + "loss": 0.5127, + "step": 320 + }, + { + "epoch": 0.8655126498002663, + "grad_norm": 1.312095284461975, + "learning_rate": 2.5629910500342424e-05, + "loss": 0.5939, + "step": 325 + }, + { + "epoch": 0.8788282290279628, + "grad_norm": 1.1822888851165771, + "learning_rate": 2.5473552271291092e-05, + "loss": 0.5529, + "step": 330 + }, + { + "epoch": 0.8921438082556591, + "grad_norm": 1.3029779195785522, + "learning_rate": 2.531494111569629e-05, + "loss": 0.5451, + "step": 335 + }, + { + "epoch": 0.9054593874833555, + "grad_norm": 1.2705796957015991, + "learning_rate": 2.5154111151808752e-05, + "loss": 0.5178, + "step": 340 + }, + { + "epoch": 0.918774966711052, + "grad_norm": 1.2116405963897705, + "learning_rate": 2.4991096975158757e-05, + "loss": 0.5338, + "step": 345 + }, + { + "epoch": 0.9320905459387483, + "grad_norm": 1.2833672761917114, + "learning_rate": 2.4825933651114375e-05, + "loss": 0.499, + "step": 350 + }, + { + "epoch": 0.9454061251664447, + "grad_norm": 1.232753872871399, + "learning_rate": 2.4658656707338733e-05, + "loss": 0.4888, + "step": 355 + }, + { + "epoch": 0.9587217043941412, + "grad_norm": 1.383094072341919, + "learning_rate": 2.4489302126147768e-05, + "loss": 0.5248, + "step": 360 + }, + { + "epoch": 0.9720372836218375, + "grad_norm": 1.3197346925735474, + "learning_rate": 2.431790633677019e-05, + "loss": 0.4644, + "step": 365 + }, + { + "epoch": 0.9853528628495339, + "grad_norm": 1.371936321258545, + "learning_rate": 2.414450620751136e-05, + "loss": 0.5205, + "step": 370 + }, + { + "epoch": 0.9986684420772304, + "grad_norm": 1.4124171733856201, + "learning_rate": 2.396913903782268e-05, + "loss": 0.4547, + "step": 375 + }, + { + "epoch": 1.0106524633821572, + "grad_norm": 1.3029264211654663, + "learning_rate": 2.379184255027822e-05, + "loss": 0.3853, + "step": 380 + }, + { + "epoch": 1.0239680426098536, + "grad_norm": 1.1009902954101562, + "learning_rate": 2.361265488246039e-05, + "loss": 0.4146, + "step": 385 + }, + { + "epoch": 1.0372836218375499, + "grad_norm": 1.3049861192703247, + "learning_rate": 2.3431614578756304e-05, + "loss": 0.3968, + "step": 390 + }, + { + "epoch": 1.0505992010652463, + "grad_norm": 1.2371097803115845, + "learning_rate": 2.3248760582066605e-05, + "loss": 0.3887, + "step": 395 + }, + { + "epoch": 1.0639147802929427, + "grad_norm": 1.2230571508407593, + "learning_rate": 2.306413222542866e-05, + "loss": 0.4184, + "step": 400 + }, + { + "epoch": 1.0772303595206392, + "grad_norm": 1.4181257486343384, + "learning_rate": 2.287776922355573e-05, + "loss": 0.4056, + "step": 405 + }, + { + "epoch": 1.0905459387483356, + "grad_norm": 1.535650372505188, + "learning_rate": 2.268971166429412e-05, + "loss": 0.3641, + "step": 410 + }, + { + "epoch": 1.103861517976032, + "grad_norm": 1.2901872396469116, + "learning_rate": 2.25e-05, + "loss": 0.4172, + "step": 415 + }, + { + "epoch": 1.1171770972037283, + "grad_norm": 1.3790102005004883, + "learning_rate": 2.2308675038837887e-05, + "loss": 0.3815, + "step": 420 + }, + { + "epoch": 1.1304926764314247, + "grad_norm": 1.2235584259033203, + "learning_rate": 2.2115777936002533e-05, + "loss": 0.3639, + "step": 425 + }, + { + "epoch": 1.1438082556591211, + "grad_norm": 1.3056392669677734, + "learning_rate": 2.192135018486618e-05, + "loss": 0.3979, + "step": 430 + }, + { + "epoch": 1.1571238348868176, + "grad_norm": 1.2899960279464722, + "learning_rate": 2.172543360805308e-05, + "loss": 0.3711, + "step": 435 + }, + { + "epoch": 1.170439414114514, + "grad_norm": 1.3279004096984863, + "learning_rate": 2.152807034844322e-05, + "loss": 0.4008, + "step": 440 + }, + { + "epoch": 1.1837549933422105, + "grad_norm": 1.3811602592468262, + "learning_rate": 2.1329302860107065e-05, + "loss": 0.3528, + "step": 445 + }, + { + "epoch": 1.1970705725699067, + "grad_norm": 1.1053953170776367, + "learning_rate": 2.1129173899173474e-05, + "loss": 0.3233, + "step": 450 + }, + { + "epoch": 1.2103861517976031, + "grad_norm": 1.2208783626556396, + "learning_rate": 2.0927726514632557e-05, + "loss": 0.3379, + "step": 455 + }, + { + "epoch": 1.2237017310252996, + "grad_norm": 1.2656804323196411, + "learning_rate": 2.072500403907559e-05, + "loss": 0.3327, + "step": 460 + }, + { + "epoch": 1.237017310252996, + "grad_norm": 1.2728395462036133, + "learning_rate": 2.0521050079373895e-05, + "loss": 0.3149, + "step": 465 + }, + { + "epoch": 1.2503328894806924, + "grad_norm": 1.0987460613250732, + "learning_rate": 2.0315908507298713e-05, + "loss": 0.3005, + "step": 470 + }, + { + "epoch": 1.2636484687083889, + "grad_norm": 1.331497311592102, + "learning_rate": 2.0109623450084154e-05, + "loss": 0.3203, + "step": 475 + }, + { + "epoch": 1.2769640479360853, + "grad_norm": 1.41068696975708, + "learning_rate": 1.990223928093511e-05, + "loss": 0.333, + "step": 480 + }, + { + "epoch": 1.2902796271637818, + "grad_norm": 1.4404975175857544, + "learning_rate": 1.9693800609482318e-05, + "loss": 0.3511, + "step": 485 + }, + { + "epoch": 1.303595206391478, + "grad_norm": 1.4686483144760132, + "learning_rate": 1.9484352272186555e-05, + "loss": 0.3122, + "step": 490 + }, + { + "epoch": 1.3169107856191744, + "grad_norm": 1.2376337051391602, + "learning_rate": 1.9273939322694035e-05, + "loss": 0.3119, + "step": 495 + }, + { + "epoch": 1.3302263648468708, + "grad_norm": 1.226077914237976, + "learning_rate": 1.906260702214508e-05, + "loss": 0.3495, + "step": 500 + }, + { + "epoch": 1.3435419440745673, + "grad_norm": 1.406785488128662, + "learning_rate": 1.8850400829438157e-05, + "loss": 0.2923, + "step": 505 + }, + { + "epoch": 1.3568575233022637, + "grad_norm": 1.2106208801269531, + "learning_rate": 1.8637366391451414e-05, + "loss": 0.3207, + "step": 510 + }, + { + "epoch": 1.37017310252996, + "grad_norm": 1.4816882610321045, + "learning_rate": 1.842354953322373e-05, + "loss": 0.303, + "step": 515 + }, + { + "epoch": 1.3834886817576564, + "grad_norm": 1.2325618267059326, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.3246, + "step": 520 + }, + { + "epoch": 1.3968042609853528, + "grad_norm": 1.360669493675232, + "learning_rate": 1.7993752687825003e-05, + "loss": 0.301, + "step": 525 + }, + { + "epoch": 1.4101198402130493, + "grad_norm": 1.3099137544631958, + "learning_rate": 1.777786515264123e-05, + "loss": 0.2684, + "step": 530 + }, + { + "epoch": 1.4234354194407457, + "grad_norm": 1.3373115062713623, + "learning_rate": 1.7561380081304063e-05, + "loss": 0.258, + "step": 535 + }, + { + "epoch": 1.4367509986684421, + "grad_norm": 1.354023814201355, + "learning_rate": 1.7344344041105177e-05, + "loss": 0.2596, + "step": 540 + }, + { + "epoch": 1.4500665778961386, + "grad_norm": 1.3495343923568726, + "learning_rate": 1.7126803717853086e-05, + "loss": 0.257, + "step": 545 + }, + { + "epoch": 1.463382157123835, + "grad_norm": 1.300793170928955, + "learning_rate": 1.6908805905830752e-05, + "loss": 0.2856, + "step": 550 + }, + { + "epoch": 1.4766977363515312, + "grad_norm": 1.3133409023284912, + "learning_rate": 1.6690397497729818e-05, + "loss": 0.2762, + "step": 555 + }, + { + "epoch": 1.4900133155792277, + "grad_norm": 1.242966890335083, + "learning_rate": 1.647162547456372e-05, + "loss": 0.2542, + "step": 560 + }, + { + "epoch": 1.503328894806924, + "grad_norm": 1.2402775287628174, + "learning_rate": 1.6252536895561754e-05, + "loss": 0.2791, + "step": 565 + }, + { + "epoch": 1.5166444740346205, + "grad_norm": 1.2238905429840088, + "learning_rate": 1.6033178888046368e-05, + "loss": 0.2651, + "step": 570 + }, + { + "epoch": 1.5299600532623168, + "grad_norm": 1.230895757675171, + "learning_rate": 1.5813598637295767e-05, + "loss": 0.2231, + "step": 575 + }, + { + "epoch": 1.5432756324900132, + "grad_norm": 1.4458600282669067, + "learning_rate": 1.5593843376394043e-05, + "loss": 0.261, + "step": 580 + }, + { + "epoch": 1.5565912117177096, + "grad_norm": 1.2178317308425903, + "learning_rate": 1.5373960376071095e-05, + "loss": 0.2375, + "step": 585 + }, + { + "epoch": 1.569906790945406, + "grad_norm": 1.3018031120300293, + "learning_rate": 1.515399693453435e-05, + "loss": 0.2423, + "step": 590 + }, + { + "epoch": 1.5832223701731025, + "grad_norm": 1.333503007888794, + "learning_rate": 1.493400036729465e-05, + "loss": 0.2499, + "step": 595 + }, + { + "epoch": 1.596537949400799, + "grad_norm": 1.3939979076385498, + "learning_rate": 1.4714017996988384e-05, + "loss": 0.2814, + "step": 600 + }, + { + "epoch": 1.6098535286284954, + "grad_norm": 1.2377736568450928, + "learning_rate": 1.4494097143198083e-05, + "loss": 0.2023, + "step": 605 + }, + { + "epoch": 1.6231691078561918, + "grad_norm": 1.3605191707611084, + "learning_rate": 1.4274285112273701e-05, + "loss": 0.2106, + "step": 610 + }, + { + "epoch": 1.6364846870838883, + "grad_norm": 1.2596324682235718, + "learning_rate": 1.4054629187156702e-05, + "loss": 0.2083, + "step": 615 + }, + { + "epoch": 1.6498002663115847, + "grad_norm": 1.1686816215515137, + "learning_rate": 1.3835176617209241e-05, + "loss": 0.2255, + "step": 620 + }, + { + "epoch": 1.663115845539281, + "grad_norm": 1.178421139717102, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2138, + "step": 625 + }, + { + "epoch": 1.6764314247669774, + "grad_norm": 1.3646788597106934, + "learning_rate": 1.3397070311402377e-05, + "loss": 0.22, + "step": 630 + }, + { + "epoch": 1.6897470039946738, + "grad_norm": 1.2379239797592163, + "learning_rate": 1.3178510814947112e-05, + "loss": 0.2023, + "step": 635 + }, + { + "epoch": 1.70306258322237, + "grad_norm": 1.2092359066009521, + "learning_rate": 1.296034313219816e-05, + "loss": 0.2242, + "step": 640 + }, + { + "epoch": 1.7163781624500665, + "grad_norm": 1.3450614213943481, + "learning_rate": 1.2742614192387417e-05, + "loss": 0.2105, + "step": 645 + }, + { + "epoch": 1.729693741677763, + "grad_norm": 1.4856157302856445, + "learning_rate": 1.2525370830370447e-05, + "loss": 0.1827, + "step": 650 + }, + { + "epoch": 1.7430093209054593, + "grad_norm": 1.1683028936386108, + "learning_rate": 1.2308659776551985e-05, + "loss": 0.1984, + "step": 655 + }, + { + "epoch": 1.7563249001331558, + "grad_norm": 1.1432174444198608, + "learning_rate": 1.209252764683395e-05, + "loss": 0.1906, + "step": 660 + }, + { + "epoch": 1.7696404793608522, + "grad_norm": 1.423350214958191, + "learning_rate": 1.1877020932588067e-05, + "loss": 0.2181, + "step": 665 + }, + { + "epoch": 1.7829560585885487, + "grad_norm": 1.0758821964263916, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.1891, + "step": 670 + }, + { + "epoch": 1.796271637816245, + "grad_norm": 1.1815426349639893, + "learning_rate": 1.1448069033374135e-05, + "loss": 0.1805, + "step": 675 + }, + { + "epoch": 1.8095872170439415, + "grad_norm": 1.2159985303878784, + "learning_rate": 1.1234716118640149e-05, + "loss": 0.2081, + "step": 680 + }, + { + "epoch": 1.822902796271638, + "grad_norm": 1.138628602027893, + "learning_rate": 1.1022173139998556e-05, + "loss": 0.1975, + "step": 685 + }, + { + "epoch": 1.8362183754993342, + "grad_norm": 1.2846992015838623, + "learning_rate": 1.0810485816772251e-05, + "loss": 0.1803, + "step": 690 + }, + { + "epoch": 1.8495339547270306, + "grad_norm": 1.148303747177124, + "learning_rate": 1.0599699684227313e-05, + "loss": 0.1773, + "step": 695 + }, + { + "epoch": 1.862849533954727, + "grad_norm": 1.220503330230713, + "learning_rate": 1.0389860083778056e-05, + "loss": 0.1692, + "step": 700 + }, + { + "epoch": 1.8761651131824233, + "grad_norm": 1.1492780447006226, + "learning_rate": 1.0181012153233851e-05, + "loss": 0.188, + "step": 705 + }, + { + "epoch": 1.8894806924101197, + "grad_norm": 1.0532795190811157, + "learning_rate": 9.973200817089655e-06, + "loss": 0.1701, + "step": 710 + }, + { + "epoch": 1.9027962716378162, + "grad_norm": 1.3459635972976685, + "learning_rate": 9.7664707768625e-06, + "loss": 0.1512, + "step": 715 + }, + { + "epoch": 1.9161118508655126, + "grad_norm": 1.3046966791152954, + "learning_rate": 9.560866501475913e-06, + "loss": 0.1711, + "step": 720 + }, + { + "epoch": 1.929427430093209, + "grad_norm": 1.144827127456665, + "learning_rate": 9.35643221769436e-06, + "loss": 0.1944, + "step": 725 + }, + { + "epoch": 1.9427430093209055, + "grad_norm": 1.218716025352478, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1967, + "step": 730 + }, + { + "epoch": 1.956058588548602, + "grad_norm": 1.1275371313095093, + "learning_rate": 8.951249264182403e-06, + "loss": 0.1749, + "step": 735 + }, + { + "epoch": 1.9693741677762984, + "grad_norm": 1.0788766145706177, + "learning_rate": 8.750587751837313e-06, + "loss": 0.18, + "step": 740 + }, + { + "epoch": 1.9826897470039948, + "grad_norm": 1.0834859609603882, + "learning_rate": 8.551270527119784e-06, + "loss": 0.1664, + "step": 745 + }, + { + "epoch": 1.996005326231691, + "grad_norm": 1.251931071281433, + "learning_rate": 8.35334046441041e-06, + "loss": 0.1838, + "step": 750 + }, + { + "epoch": 2.007989347536618, + "grad_norm": 1.1231558322906494, + "learning_rate": 8.156840139702554e-06, + "loss": 0.1551, + "step": 755 + }, + { + "epoch": 2.0213049267643144, + "grad_norm": 1.1553610563278198, + "learning_rate": 7.961811821444008e-06, + "loss": 0.1223, + "step": 760 + }, + { + "epoch": 2.034620505992011, + "grad_norm": 1.0494449138641357, + "learning_rate": 7.768297461444766e-06, + "loss": 0.1369, + "step": 765 + }, + { + "epoch": 2.0479360852197073, + "grad_norm": 1.1568539142608643, + "learning_rate": 7.576338685852955e-06, + "loss": 0.1178, + "step": 770 + }, + { + "epoch": 2.0612516644474033, + "grad_norm": 0.9905392527580261, + "learning_rate": 7.385976786200765e-06, + "loss": 0.1229, + "step": 775 + }, + { + "epoch": 2.0745672436750997, + "grad_norm": 1.1538437604904175, + "learning_rate": 7.197252710522395e-06, + "loss": 0.1416, + "step": 780 + }, + { + "epoch": 2.087882822902796, + "grad_norm": 0.8637592792510986, + "learning_rate": 7.010207054545873e-06, + "loss": 0.1113, + "step": 785 + }, + { + "epoch": 2.1011984021304926, + "grad_norm": 1.1173475980758667, + "learning_rate": 6.8248800529606604e-06, + "loss": 0.1132, + "step": 790 + }, + { + "epoch": 2.114513981358189, + "grad_norm": 0.8932748436927795, + "learning_rate": 6.641311570762918e-06, + "loss": 0.1271, + "step": 795 + }, + { + "epoch": 2.1278295605858855, + "grad_norm": 1.0550264120101929, + "learning_rate": 6.4595410946803e-06, + "loss": 0.1243, + "step": 800 + }, + { + "epoch": 2.141145139813582, + "grad_norm": 0.9867209196090698, + "learning_rate": 6.2796077246781046e-06, + "loss": 0.1151, + "step": 805 + }, + { + "epoch": 2.1544607190412783, + "grad_norm": 1.1324623823165894, + "learning_rate": 6.1015501655486365e-06, + "loss": 0.1283, + "step": 810 + }, + { + "epoch": 2.1677762982689748, + "grad_norm": 1.1827080249786377, + "learning_rate": 5.925406718585552e-06, + "loss": 0.1241, + "step": 815 + }, + { + "epoch": 2.181091877496671, + "grad_norm": 1.7942860126495361, + "learning_rate": 5.751215273345036e-06, + "loss": 0.1228, + "step": 820 + }, + { + "epoch": 2.1944074567243677, + "grad_norm": 0.9133052825927734, + "learning_rate": 5.5790132994954935e-06, + "loss": 0.1037, + "step": 825 + }, + { + "epoch": 2.207723035952064, + "grad_norm": 1.0704667568206787, + "learning_rate": 5.408837838757588e-06, + "loss": 0.1075, + "step": 830 + }, + { + "epoch": 2.2210386151797605, + "grad_norm": 0.8421687483787537, + "learning_rate": 5.240725496936373e-06, + "loss": 0.111, + "step": 835 + }, + { + "epoch": 2.2343541944074565, + "grad_norm": 1.0829740762710571, + "learning_rate": 5.0747124360471125e-06, + "loss": 0.0893, + "step": 840 + }, + { + "epoch": 2.247669773635153, + "grad_norm": 1.2417296171188354, + "learning_rate": 4.910834366536631e-06, + "loss": 0.1155, + "step": 845 + }, + { + "epoch": 2.2609853528628494, + "grad_norm": 0.9977407455444336, + "learning_rate": 4.74912653960177e-06, + "loss": 0.0936, + "step": 850 + }, + { + "epoch": 2.274300932090546, + "grad_norm": 0.9948540329933167, + "learning_rate": 4.589623739606625e-06, + "loss": 0.1142, + "step": 855 + }, + { + "epoch": 2.2876165113182423, + "grad_norm": 0.7560556530952454, + "learning_rate": 4.4323602766002165e-06, + "loss": 0.1072, + "step": 860 + }, + { + "epoch": 2.3009320905459387, + "grad_norm": 1.011824369430542, + "learning_rate": 4.277369978936188e-06, + "loss": 0.1063, + "step": 865 + }, + { + "epoch": 2.314247669773635, + "grad_norm": 1.1578537225723267, + "learning_rate": 4.1246861859961114e-06, + "loss": 0.1224, + "step": 870 + }, + { + "epoch": 2.3275632490013316, + "grad_norm": 1.2085102796554565, + "learning_rate": 3.974341741017978e-06, + "loss": 0.148, + "step": 875 + }, + { + "epoch": 2.340878828229028, + "grad_norm": 0.9562546610832214, + "learning_rate": 3.826368984031414e-06, + "loss": 0.1067, + "step": 880 + }, + { + "epoch": 2.3541944074567245, + "grad_norm": 0.974391520023346, + "learning_rate": 3.6807997449011426e-06, + "loss": 0.101, + "step": 885 + }, + { + "epoch": 2.367509986684421, + "grad_norm": 1.2590659856796265, + "learning_rate": 3.5376653364801703e-06, + "loss": 0.1052, + "step": 890 + }, + { + "epoch": 2.3808255659121174, + "grad_norm": 0.9821212291717529, + "learning_rate": 3.3969965478742038e-06, + "loss": 0.1041, + "step": 895 + }, + { + "epoch": 2.3941411451398134, + "grad_norm": 0.8471741676330566, + "learning_rate": 3.258823637818722e-06, + "loss": 0.1122, + "step": 900 + }, + { + "epoch": 2.40745672436751, + "grad_norm": 0.9534651637077332, + "learning_rate": 3.123176328170131e-06, + "loss": 0.0963, + "step": 905 + }, + { + "epoch": 2.4207723035952062, + "grad_norm": 1.1509631872177124, + "learning_rate": 2.990083797512401e-06, + "loss": 0.105, + "step": 910 + }, + { + "epoch": 2.4340878828229027, + "grad_norm": 0.8862387537956238, + "learning_rate": 2.8595746748805805e-06, + "loss": 0.0964, + "step": 915 + }, + { + "epoch": 2.447403462050599, + "grad_norm": 1.1030343770980835, + "learning_rate": 2.7316770336025166e-06, + "loss": 0.1066, + "step": 920 + }, + { + "epoch": 2.4607190412782955, + "grad_norm": 0.7888065576553345, + "learning_rate": 2.60641838526008e-06, + "loss": 0.0931, + "step": 925 + }, + { + "epoch": 2.474034620505992, + "grad_norm": 0.7563580870628357, + "learning_rate": 2.483825673771279e-06, + "loss": 0.0968, + "step": 930 + }, + { + "epoch": 2.4873501997336884, + "grad_norm": 0.8203815817832947, + "learning_rate": 2.363925269594449e-06, + "loss": 0.0873, + "step": 935 + }, + { + "epoch": 2.500665778961385, + "grad_norm": 0.8811711072921753, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.0879, + "step": 940 + }, + { + "epoch": 2.5139813581890813, + "grad_norm": 0.7034832835197449, + "learning_rate": 2.1323039638015024e-06, + "loss": 0.1073, + "step": 945 + }, + { + "epoch": 2.5272969374167777, + "grad_norm": 0.8818060755729675, + "learning_rate": 2.020632885375684e-06, + "loss": 0.0974, + "step": 950 + }, + { + "epoch": 2.540612516644474, + "grad_norm": 0.8902575373649597, + "learning_rate": 1.9117537499251416e-06, + "loss": 0.0981, + "step": 955 + }, + { + "epoch": 2.5539280958721706, + "grad_norm": 0.9755932092666626, + "learning_rate": 1.8056899780323016e-06, + "loss": 0.0973, + "step": 960 + }, + { + "epoch": 2.567243675099867, + "grad_norm": 0.8470637798309326, + "learning_rate": 1.7024643846772981e-06, + "loss": 0.0896, + "step": 965 + }, + { + "epoch": 2.5805592543275635, + "grad_norm": 0.6837173104286194, + "learning_rate": 1.6020991743303264e-06, + "loss": 0.0812, + "step": 970 + }, + { + "epoch": 2.5938748335552595, + "grad_norm": 0.858142077922821, + "learning_rate": 1.5046159361753226e-06, + "loss": 0.0922, + "step": 975 + }, + { + "epoch": 2.607190412782956, + "grad_norm": 0.7120624780654907, + "learning_rate": 1.4100356394659863e-06, + "loss": 0.0993, + "step": 980 + }, + { + "epoch": 2.6205059920106524, + "grad_norm": 0.8150039315223694, + "learning_rate": 1.318378629015184e-06, + "loss": 0.1033, + "step": 985 + }, + { + "epoch": 2.633821571238349, + "grad_norm": 0.8152164220809937, + "learning_rate": 1.229664620818633e-06, + "loss": 0.0974, + "step": 990 + }, + { + "epoch": 2.6471371504660453, + "grad_norm": 0.8708423972129822, + "learning_rate": 1.1439126978138769e-06, + "loss": 0.0909, + "step": 995 + }, + { + "epoch": 2.6604527296937417, + "grad_norm": 0.7374972701072693, + "learning_rate": 1.0611413057754221e-06, + "loss": 0.0877, + "step": 1000 + }, + { + "epoch": 2.673768308921438, + "grad_norm": 0.9587220549583435, + "learning_rate": 9.813682493469396e-07, + "loss": 0.0992, + "step": 1005 + }, + { + "epoch": 2.6870838881491346, + "grad_norm": 0.749595582485199, + "learning_rate": 9.046106882113753e-07, + "loss": 0.0976, + "step": 1010 + }, + { + "epoch": 2.700399467376831, + "grad_norm": 0.7693611979484558, + "learning_rate": 8.308851333997918e-07, + "loss": 0.0997, + "step": 1015 + }, + { + "epoch": 2.7137150466045274, + "grad_norm": 0.7854838967323303, + "learning_rate": 7.602074437397455e-07, + "loss": 0.0867, + "step": 1020 + }, + { + "epoch": 2.7270306258322234, + "grad_norm": 0.7105079293251038, + "learning_rate": 6.925928224439532e-07, + "loss": 0.089, + "step": 1025 + }, + { + "epoch": 2.74034620505992, + "grad_norm": 0.8280924558639526, + "learning_rate": 6.280558138399805e-07, + "loss": 0.0995, + "step": 1030 + }, + { + "epoch": 2.7536617842876163, + "grad_norm": 0.5871623754501343, + "learning_rate": 5.666103002416762e-07, + "loss": 0.0768, + "step": 1035 + }, + { + "epoch": 2.7669773635153128, + "grad_norm": 0.7730492353439331, + "learning_rate": 5.082694989629916e-07, + "loss": 0.0804, + "step": 1040 + }, + { + "epoch": 2.780292942743009, + "grad_norm": 0.8369763493537903, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.0937, + "step": 1045 + }, + { + "epoch": 2.7936085219707056, + "grad_norm": 0.8297663331031799, + "learning_rate": 4.0095156070571513e-07, + "loss": 0.0819, + "step": 1050 + }, + { + "epoch": 2.806924101198402, + "grad_norm": 0.7614980340003967, + "learning_rate": 3.5199750848627753e-07, + "loss": 0.0841, + "step": 1055 + }, + { + "epoch": 2.8202396804260985, + "grad_norm": 0.7854995131492615, + "learning_rate": 3.0619433313909706e-07, + "loss": 0.1068, + "step": 1060 + }, + { + "epoch": 2.833555259653795, + "grad_norm": 0.7981512546539307, + "learning_rate": 2.635518872134185e-07, + "loss": 0.0991, + "step": 1065 + }, + { + "epoch": 2.8468708388814914, + "grad_norm": 0.8420575857162476, + "learning_rate": 2.2407934336583446e-07, + "loss": 0.0858, + "step": 1070 + }, + { + "epoch": 2.860186418109188, + "grad_norm": 0.777001142501831, + "learning_rate": 1.8778519238719204e-07, + "loss": 0.0893, + "step": 1075 + }, + { + "epoch": 2.8735019973368843, + "grad_norm": 0.7179780006408691, + "learning_rate": 1.5467724137617046e-07, + "loss": 0.0849, + "step": 1080 + }, + { + "epoch": 2.8868175765645807, + "grad_norm": 0.9118334650993347, + "learning_rate": 1.2476261205992934e-07, + "loss": 0.0961, + "step": 1085 + }, + { + "epoch": 2.900133155792277, + "grad_norm": 0.6071268320083618, + "learning_rate": 9.804773926217092e-08, + "loss": 0.0706, + "step": 1090 + }, + { + "epoch": 2.9134487350199736, + "grad_norm": 0.7746210098266602, + "learning_rate": 7.453836951897885e-08, + "loss": 0.0788, + "step": 1095 + }, + { + "epoch": 2.92676431424767, + "grad_norm": 0.5561127662658691, + "learning_rate": 5.4239559842695354e-08, + "loss": 0.0883, + "step": 1100 + }, + { + "epoch": 2.940079893475366, + "grad_norm": 0.7587743997573853, + "learning_rate": 3.715567663412966e-08, + "loss": 0.0905, + "step": 1105 + }, + { + "epoch": 2.9533954727030625, + "grad_norm": 0.8303349018096924, + "learning_rate": 2.3290394743317732e-08, + "loss": 0.0769, + "step": 1110 + }, + { + "epoch": 2.966711051930759, + "grad_norm": 0.6291532516479492, + "learning_rate": 1.2646696679042835e-08, + "loss": 0.0846, + "step": 1115 + }, + { + "epoch": 2.9800266311584553, + "grad_norm": 0.6718606948852539, + "learning_rate": 5.2268719672671215e-09, + "loss": 0.0863, + "step": 1120 + }, + { + "epoch": 2.993342210386152, + "grad_norm": 0.8351708650588989, + "learning_rate": 1.0325166586572233e-09, + "loss": 0.0838, + "step": 1125 + }, + { + "epoch": 3.0, + "step": 1128, + "total_flos": 1.4414949418081976e+18, + "train_loss": 0.43326316522896713, + "train_runtime": 1137.0408, + "train_samples_per_second": 31.698, + "train_steps_per_second": 0.992 + } + ], + "logging_steps": 5, + "max_steps": 1128, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4414949418081976e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a23f1753536bfa551bf79f0c2d63a11ff3b631d4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/4_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:decda3b19b748e050db974577149db91f4a292d506fe0734ba550c7e476f4e00 +size 8273 diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3c30b0b280ce4d22ff226662c6a9696fe03552ef --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 5_128_e3_3e-5 + results: [] +--- + + + +# 5_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..52f3f8d82a298ba93812cc495dd9c35f6c096bcb --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "o_proj", + "q_proj", + "up_proj", + "down_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f34a66d5679748c94ee40c432caff901d43504d --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e16811ea1610b13e2fb0da2ad2d48b4819aa2749b57e995cede4a36da78af58 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..14edd5d007b106ef2da7479d0e4ae47492fe7589 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.225514319484027e+18, + "train_loss": 0.422133161341743, + "train_runtime": 987.0743, + "train_samples": 10463, + "train_samples_per_second": 31.8, + "train_steps_per_second": 0.994 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..14edd5d007b106ef2da7479d0e4ae47492fe7589 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.225514319484027e+18, + "train_loss": 0.422133161341743, + "train_runtime": 987.0743, + "train_samples": 10463, + "train_samples_per_second": 31.8, + "train_steps_per_second": 0.994 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..20d23f8059c9faa949c20666afcd1c9ad3c97b33 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1415 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 981, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01529051987767584, + "grad_norm": 0.7324830889701843, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.5402, + "step": 5 + }, + { + "epoch": 0.03058103975535168, + "grad_norm": 0.586818277835846, + "learning_rate": 5.4e-06, + "loss": 1.5341, + "step": 10 + }, + { + "epoch": 0.045871559633027525, + "grad_norm": 0.6613988280296326, + "learning_rate": 8.400000000000001e-06, + "loss": 1.5615, + "step": 15 + }, + { + "epoch": 0.06116207951070336, + "grad_norm": 0.5686705112457275, + "learning_rate": 1.1400000000000001e-05, + "loss": 1.5791, + "step": 20 + }, + { + "epoch": 0.0764525993883792, + "grad_norm": 0.559758186340332, + "learning_rate": 1.44e-05, + "loss": 1.5313, + "step": 25 + }, + { + "epoch": 0.09174311926605505, + "grad_norm": 0.5023041367530823, + "learning_rate": 1.74e-05, + "loss": 1.5419, + "step": 30 + }, + { + "epoch": 0.10703363914373089, + "grad_norm": 0.662456214427948, + "learning_rate": 2.04e-05, + "loss": 1.4723, + "step": 35 + }, + { + "epoch": 0.12232415902140673, + "grad_norm": 0.4583873748779297, + "learning_rate": 2.3400000000000003e-05, + "loss": 1.4454, + "step": 40 + }, + { + "epoch": 0.13761467889908258, + "grad_norm": 0.5301771759986877, + "learning_rate": 2.64e-05, + "loss": 1.4132, + "step": 45 + }, + { + "epoch": 0.1529051987767584, + "grad_norm": 0.5559477806091309, + "learning_rate": 2.94e-05, + "loss": 1.3457, + "step": 50 + }, + { + "epoch": 0.16819571865443425, + "grad_norm": 0.5550680756568909, + "learning_rate": 2.999863360886452e-05, + "loss": 1.3426, + "step": 55 + }, + { + "epoch": 0.1834862385321101, + "grad_norm": 0.5964385867118835, + "learning_rate": 2.99930830715182e-05, + "loss": 1.3104, + "step": 60 + }, + { + "epoch": 0.19877675840978593, + "grad_norm": 0.7314971089363098, + "learning_rate": 2.9983264567328756e-05, + "loss": 1.3721, + "step": 65 + }, + { + "epoch": 0.21406727828746178, + "grad_norm": 0.636846125125885, + "learning_rate": 2.9969180891255046e-05, + "loss": 1.2829, + "step": 70 + }, + { + "epoch": 0.22935779816513763, + "grad_norm": 0.6363294124603271, + "learning_rate": 2.9950836052389943e-05, + "loss": 1.3026, + "step": 75 + }, + { + "epoch": 0.24464831804281345, + "grad_norm": 0.6040486097335815, + "learning_rate": 2.9928235272819095e-05, + "loss": 1.1725, + "step": 80 + }, + { + "epoch": 0.2599388379204893, + "grad_norm": 0.6939135789871216, + "learning_rate": 2.9901384986134417e-05, + "loss": 1.2411, + "step": 85 + }, + { + "epoch": 0.27522935779816515, + "grad_norm": 0.8100473880767822, + "learning_rate": 2.9870292835602647e-05, + "loss": 1.1372, + "step": 90 + }, + { + "epoch": 0.290519877675841, + "grad_norm": 0.9033547043800354, + "learning_rate": 2.9834967671989633e-05, + "loss": 1.1511, + "step": 95 + }, + { + "epoch": 0.3058103975535168, + "grad_norm": 0.7998380064964294, + "learning_rate": 2.9795419551040836e-05, + "loss": 1.0738, + "step": 100 + }, + { + "epoch": 0.3211009174311927, + "grad_norm": 0.898135244846344, + "learning_rate": 2.9751659730618836e-05, + "loss": 1.0895, + "step": 105 + }, + { + "epoch": 0.3363914373088685, + "grad_norm": 0.7950589060783386, + "learning_rate": 2.9703700667498654e-05, + "loss": 1.114, + "step": 110 + }, + { + "epoch": 0.3516819571865443, + "grad_norm": 0.7623441219329834, + "learning_rate": 2.9651556013821764e-05, + "loss": 0.9774, + "step": 115 + }, + { + "epoch": 0.3669724770642202, + "grad_norm": 0.8223645687103271, + "learning_rate": 2.959524061320984e-05, + "loss": 0.9975, + "step": 120 + }, + { + "epoch": 0.382262996941896, + "grad_norm": 0.9481493234634399, + "learning_rate": 2.9534770496539352e-05, + "loss": 0.9652, + "step": 125 + }, + { + "epoch": 0.39755351681957185, + "grad_norm": 0.8759327530860901, + "learning_rate": 2.9470162877378167e-05, + "loss": 1.0058, + "step": 130 + }, + { + "epoch": 0.41284403669724773, + "grad_norm": 0.8592563271522522, + "learning_rate": 2.940143614708549e-05, + "loss": 0.9312, + "step": 135 + }, + { + "epoch": 0.42813455657492355, + "grad_norm": 0.9046728014945984, + "learning_rate": 2.9328609869576543e-05, + "loss": 0.9443, + "step": 140 + }, + { + "epoch": 0.4434250764525994, + "grad_norm": 0.9851959943771362, + "learning_rate": 2.9251704775753455e-05, + "loss": 0.9109, + "step": 145 + }, + { + "epoch": 0.45871559633027525, + "grad_norm": 0.9640716910362244, + "learning_rate": 2.9170742757603937e-05, + "loss": 0.8775, + "step": 150 + }, + { + "epoch": 0.4740061162079511, + "grad_norm": 1.2667337656021118, + "learning_rate": 2.908574686196949e-05, + "loss": 0.8285, + "step": 155 + }, + { + "epoch": 0.4892966360856269, + "grad_norm": 1.035611867904663, + "learning_rate": 2.8996741283984803e-05, + "loss": 0.9037, + "step": 160 + }, + { + "epoch": 0.5045871559633027, + "grad_norm": 1.0570251941680908, + "learning_rate": 2.8903751360190327e-05, + "loss": 0.8282, + "step": 165 + }, + { + "epoch": 0.5198776758409785, + "grad_norm": 0.9721060991287231, + "learning_rate": 2.8806803561319903e-05, + "loss": 0.8022, + "step": 170 + }, + { + "epoch": 0.5351681957186545, + "grad_norm": 1.1251496076583862, + "learning_rate": 2.8705925484765556e-05, + "loss": 0.8682, + "step": 175 + }, + { + "epoch": 0.5504587155963303, + "grad_norm": 1.2680888175964355, + "learning_rate": 2.8601145846721527e-05, + "loss": 0.7651, + "step": 180 + }, + { + "epoch": 0.5657492354740061, + "grad_norm": 1.1732733249664307, + "learning_rate": 2.84924944740099e-05, + "loss": 0.8191, + "step": 185 + }, + { + "epoch": 0.581039755351682, + "grad_norm": 1.1099917888641357, + "learning_rate": 2.8380002295589963e-05, + "loss": 0.8187, + "step": 190 + }, + { + "epoch": 0.5963302752293578, + "grad_norm": 1.0164685249328613, + "learning_rate": 2.8263701333753964e-05, + "loss": 0.7441, + "step": 195 + }, + { + "epoch": 0.6116207951070336, + "grad_norm": 1.2063839435577393, + "learning_rate": 2.8143624695011514e-05, + "loss": 0.7351, + "step": 200 + }, + { + "epoch": 0.6269113149847095, + "grad_norm": 1.1033843755722046, + "learning_rate": 2.801980656066545e-05, + "loss": 0.6785, + "step": 205 + }, + { + "epoch": 0.6422018348623854, + "grad_norm": 1.4292913675308228, + "learning_rate": 2.789228217708166e-05, + "loss": 0.671, + "step": 210 + }, + { + "epoch": 0.6574923547400612, + "grad_norm": 1.20248281955719, + "learning_rate": 2.776108784565583e-05, + "loss": 0.677, + "step": 215 + }, + { + "epoch": 0.672782874617737, + "grad_norm": 1.4572244882583618, + "learning_rate": 2.762626091247973e-05, + "loss": 0.7198, + "step": 220 + }, + { + "epoch": 0.6880733944954128, + "grad_norm": 1.088163137435913, + "learning_rate": 2.748783975771024e-05, + "loss": 0.6746, + "step": 225 + }, + { + "epoch": 0.7033639143730887, + "grad_norm": 1.1942415237426758, + "learning_rate": 2.734586378464395e-05, + "loss": 0.642, + "step": 230 + }, + { + "epoch": 0.7186544342507645, + "grad_norm": 1.2001384496688843, + "learning_rate": 2.7200373408500498e-05, + "loss": 0.6412, + "step": 235 + }, + { + "epoch": 0.7339449541284404, + "grad_norm": 1.193349838256836, + "learning_rate": 2.705141004491792e-05, + "loss": 0.7044, + "step": 240 + }, + { + "epoch": 0.7492354740061162, + "grad_norm": 1.5693448781967163, + "learning_rate": 2.689901609816313e-05, + "loss": 0.6148, + "step": 245 + }, + { + "epoch": 0.764525993883792, + "grad_norm": 1.1491652727127075, + "learning_rate": 2.6743234949061072e-05, + "loss": 0.5818, + "step": 250 + }, + { + "epoch": 0.7798165137614679, + "grad_norm": 1.2533149719238281, + "learning_rate": 2.658411094264577e-05, + "loss": 0.5827, + "step": 255 + }, + { + "epoch": 0.7951070336391437, + "grad_norm": 1.2426414489746094, + "learning_rate": 2.6421689375537015e-05, + "loss": 0.5601, + "step": 260 + }, + { + "epoch": 0.8103975535168195, + "grad_norm": 1.174207091331482, + "learning_rate": 2.6256016483046106e-05, + "loss": 0.5483, + "step": 265 + }, + { + "epoch": 0.8256880733944955, + "grad_norm": 1.154205560684204, + "learning_rate": 2.6087139426014373e-05, + "loss": 0.5935, + "step": 270 + }, + { + "epoch": 0.8409785932721713, + "grad_norm": 1.4638313055038452, + "learning_rate": 2.5915106277388293e-05, + "loss": 0.5647, + "step": 275 + }, + { + "epoch": 0.8562691131498471, + "grad_norm": 1.299365758895874, + "learning_rate": 2.573996600853492e-05, + "loss": 0.5025, + "step": 280 + }, + { + "epoch": 0.8715596330275229, + "grad_norm": 1.3994636535644531, + "learning_rate": 2.5561768475301556e-05, + "loss": 0.5255, + "step": 285 + }, + { + "epoch": 0.8868501529051988, + "grad_norm": 1.2511111497879028, + "learning_rate": 2.53805644038237e-05, + "loss": 0.5112, + "step": 290 + }, + { + "epoch": 0.9021406727828746, + "grad_norm": 1.2232036590576172, + "learning_rate": 2.519640537608521e-05, + "loss": 0.5039, + "step": 295 + }, + { + "epoch": 0.9174311926605505, + "grad_norm": 1.4130849838256836, + "learning_rate": 2.5009343815234845e-05, + "loss": 0.5179, + "step": 300 + }, + { + "epoch": 0.9327217125382263, + "grad_norm": 1.2880096435546875, + "learning_rate": 2.4819432970663375e-05, + "loss": 0.4666, + "step": 305 + }, + { + "epoch": 0.9480122324159022, + "grad_norm": 1.3711122274398804, + "learning_rate": 2.4626726902845477e-05, + "loss": 0.5012, + "step": 310 + }, + { + "epoch": 0.963302752293578, + "grad_norm": 1.2726308107376099, + "learning_rate": 2.4431280467950758e-05, + "loss": 0.4896, + "step": 315 + }, + { + "epoch": 0.9785932721712538, + "grad_norm": 1.1021482944488525, + "learning_rate": 2.423314930222828e-05, + "loss": 0.4979, + "step": 320 + }, + { + "epoch": 0.9938837920489296, + "grad_norm": 1.294688105583191, + "learning_rate": 2.4032389806168958e-05, + "loss": 0.4921, + "step": 325 + }, + { + "epoch": 1.0091743119266054, + "grad_norm": 1.2074092626571655, + "learning_rate": 2.3829059128450537e-05, + "loss": 0.4271, + "step": 330 + }, + { + "epoch": 1.0244648318042813, + "grad_norm": 1.2859591245651245, + "learning_rate": 2.362321514966945e-05, + "loss": 0.4395, + "step": 335 + }, + { + "epoch": 1.039755351681957, + "grad_norm": 1.2853814363479614, + "learning_rate": 2.3414916465864434e-05, + "loss": 0.3983, + "step": 340 + }, + { + "epoch": 1.0550458715596331, + "grad_norm": 1.549614429473877, + "learning_rate": 2.320422237183641e-05, + "loss": 0.394, + "step": 345 + }, + { + "epoch": 1.070336391437309, + "grad_norm": 1.290798306465149, + "learning_rate": 2.299119284426948e-05, + "loss": 0.3773, + "step": 350 + }, + { + "epoch": 1.0856269113149848, + "grad_norm": 1.3393418788909912, + "learning_rate": 2.277588852465788e-05, + "loss": 0.3306, + "step": 355 + }, + { + "epoch": 1.1009174311926606, + "grad_norm": 1.1729761362075806, + "learning_rate": 2.2558370702043535e-05, + "loss": 0.4179, + "step": 360 + }, + { + "epoch": 1.1162079510703364, + "grad_norm": 1.4420814514160156, + "learning_rate": 2.233870129556946e-05, + "loss": 0.3362, + "step": 365 + }, + { + "epoch": 1.1314984709480123, + "grad_norm": 1.3537983894348145, + "learning_rate": 2.211694283685364e-05, + "loss": 0.3565, + "step": 370 + }, + { + "epoch": 1.146788990825688, + "grad_norm": 1.2138665914535522, + "learning_rate": 2.1893158452188696e-05, + "loss": 0.3653, + "step": 375 + }, + { + "epoch": 1.162079510703364, + "grad_norm": 1.5690853595733643, + "learning_rate": 2.166741184457214e-05, + "loss": 0.3196, + "step": 380 + }, + { + "epoch": 1.1773700305810397, + "grad_norm": 1.279282569885254, + "learning_rate": 2.1439767275572558e-05, + "loss": 0.3342, + "step": 385 + }, + { + "epoch": 1.1926605504587156, + "grad_norm": 1.8452625274658203, + "learning_rate": 2.1210289547036784e-05, + "loss": 0.3422, + "step": 390 + }, + { + "epoch": 1.2079510703363914, + "grad_norm": 1.2902981042861938, + "learning_rate": 2.0979043982643194e-05, + "loss": 0.3017, + "step": 395 + }, + { + "epoch": 1.2232415902140672, + "grad_norm": 1.5250523090362549, + "learning_rate": 2.0746096409306568e-05, + "loss": 0.3049, + "step": 400 + }, + { + "epoch": 1.238532110091743, + "grad_norm": 1.194884181022644, + "learning_rate": 2.0511513138439597e-05, + "loss": 0.337, + "step": 405 + }, + { + "epoch": 1.2538226299694188, + "grad_norm": 1.427438735961914, + "learning_rate": 2.0275360947076547e-05, + "loss": 0.3278, + "step": 410 + }, + { + "epoch": 1.2691131498470947, + "grad_norm": 1.3427449464797974, + "learning_rate": 2.0037707058864343e-05, + "loss": 0.3017, + "step": 415 + }, + { + "epoch": 1.2844036697247707, + "grad_norm": 1.3093918561935425, + "learning_rate": 1.979861912492651e-05, + "loss": 0.2751, + "step": 420 + }, + { + "epoch": 1.2996941896024465, + "grad_norm": 1.371841311454773, + "learning_rate": 1.9558165204605473e-05, + "loss": 0.3233, + "step": 425 + }, + { + "epoch": 1.3149847094801224, + "grad_norm": 1.30031418800354, + "learning_rate": 1.9316413746088594e-05, + "loss": 0.3514, + "step": 430 + }, + { + "epoch": 1.3302752293577982, + "grad_norm": 1.304273247718811, + "learning_rate": 1.907343356692356e-05, + "loss": 0.3158, + "step": 435 + }, + { + "epoch": 1.345565749235474, + "grad_norm": 1.278242588043213, + "learning_rate": 1.8829293834428606e-05, + "loss": 0.314, + "step": 440 + }, + { + "epoch": 1.3608562691131498, + "grad_norm": 1.2268272638320923, + "learning_rate": 1.858406404600319e-05, + "loss": 0.2726, + "step": 445 + }, + { + "epoch": 1.3761467889908257, + "grad_norm": 1.3202054500579834, + "learning_rate": 1.8337814009344716e-05, + "loss": 0.2794, + "step": 450 + }, + { + "epoch": 1.3914373088685015, + "grad_norm": 1.3028744459152222, + "learning_rate": 1.8090613822576858e-05, + "loss": 0.2586, + "step": 455 + }, + { + "epoch": 1.4067278287461773, + "grad_norm": 1.2913042306900024, + "learning_rate": 1.7842533854295294e-05, + "loss": 0.2285, + "step": 460 + }, + { + "epoch": 1.4220183486238533, + "grad_norm": 1.261464238166809, + "learning_rate": 1.759364472353643e-05, + "loss": 0.3082, + "step": 465 + }, + { + "epoch": 1.4373088685015292, + "grad_norm": 1.312692642211914, + "learning_rate": 1.7344017279674774e-05, + "loss": 0.2465, + "step": 470 + }, + { + "epoch": 1.452599388379205, + "grad_norm": 1.29775869846344, + "learning_rate": 1.709372258225482e-05, + "loss": 0.2814, + "step": 475 + }, + { + "epoch": 1.4678899082568808, + "grad_norm": 1.2508231401443481, + "learning_rate": 1.684283188076304e-05, + "loss": 0.2849, + "step": 480 + }, + { + "epoch": 1.4831804281345566, + "grad_norm": 1.2533626556396484, + "learning_rate": 1.659141659434587e-05, + "loss": 0.2197, + "step": 485 + }, + { + "epoch": 1.4984709480122325, + "grad_norm": 1.6360969543457031, + "learning_rate": 1.633954829147932e-05, + "loss": 0.2541, + "step": 490 + }, + { + "epoch": 1.5137614678899083, + "grad_norm": 1.2090907096862793, + "learning_rate": 1.6087298669596162e-05, + "loss": 0.2427, + "step": 495 + }, + { + "epoch": 1.529051987767584, + "grad_norm": 1.3616001605987549, + "learning_rate": 1.583473953467634e-05, + "loss": 0.2168, + "step": 500 + }, + { + "epoch": 1.54434250764526, + "grad_norm": 1.5401846170425415, + "learning_rate": 1.558194278080652e-05, + "loss": 0.2463, + "step": 505 + }, + { + "epoch": 1.5596330275229358, + "grad_norm": 1.4477574825286865, + "learning_rate": 1.5328980369714554e-05, + "loss": 0.2159, + "step": 510 + }, + { + "epoch": 1.5749235474006116, + "grad_norm": 1.3912242650985718, + "learning_rate": 1.507592431028466e-05, + "loss": 0.2132, + "step": 515 + }, + { + "epoch": 1.5902140672782874, + "grad_norm": 1.3047600984573364, + "learning_rate": 1.4822846638059234e-05, + "loss": 0.2091, + "step": 520 + }, + { + "epoch": 1.6055045871559632, + "grad_norm": 1.2054026126861572, + "learning_rate": 1.4569819394733004e-05, + "loss": 0.2426, + "step": 525 + }, + { + "epoch": 1.620795107033639, + "grad_norm": 1.8151520490646362, + "learning_rate": 1.4316914607645509e-05, + "loss": 0.1967, + "step": 530 + }, + { + "epoch": 1.6360856269113149, + "grad_norm": 1.1745802164077759, + "learning_rate": 1.4064204269277572e-05, + "loss": 0.2098, + "step": 535 + }, + { + "epoch": 1.6513761467889907, + "grad_norm": 1.3514999151229858, + "learning_rate": 1.3811760316757804e-05, + "loss": 0.2016, + "step": 540 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 1.1854194402694702, + "learning_rate": 1.3559654611384774e-05, + "loss": 0.1887, + "step": 545 + }, + { + "epoch": 1.6819571865443423, + "grad_norm": 1.491900086402893, + "learning_rate": 1.330795891817082e-05, + "loss": 0.1884, + "step": 550 + }, + { + "epoch": 1.6972477064220184, + "grad_norm": 1.2474677562713623, + "learning_rate": 1.3056744885413216e-05, + "loss": 0.2011, + "step": 555 + }, + { + "epoch": 1.7125382262996942, + "grad_norm": 1.6224690675735474, + "learning_rate": 1.2806084024298648e-05, + "loss": 0.1906, + "step": 560 + }, + { + "epoch": 1.72782874617737, + "grad_norm": 1.2442169189453125, + "learning_rate": 1.2556047688546622e-05, + "loss": 0.1931, + "step": 565 + }, + { + "epoch": 1.7431192660550459, + "grad_norm": 1.8016092777252197, + "learning_rate": 1.230670705409777e-05, + "loss": 0.1919, + "step": 570 + }, + { + "epoch": 1.7584097859327217, + "grad_norm": 1.1365842819213867, + "learning_rate": 1.2058133098852735e-05, + "loss": 0.1582, + "step": 575 + }, + { + "epoch": 1.7737003058103975, + "grad_norm": 1.1120567321777344, + "learning_rate": 1.1810396582467407e-05, + "loss": 0.2026, + "step": 580 + }, + { + "epoch": 1.7889908256880735, + "grad_norm": 1.334220051765442, + "learning_rate": 1.156356802621035e-05, + "loss": 0.1952, + "step": 585 + }, + { + "epoch": 1.8042813455657494, + "grad_norm": 1.2245335578918457, + "learning_rate": 1.1317717692888014e-05, + "loss": 0.2095, + "step": 590 + }, + { + "epoch": 1.8195718654434252, + "grad_norm": 1.1530183553695679, + "learning_rate": 1.1072915566843582e-05, + "loss": 0.1878, + "step": 595 + }, + { + "epoch": 1.834862385321101, + "grad_norm": 1.2000569105148315, + "learning_rate": 1.0829231334035052e-05, + "loss": 0.1888, + "step": 600 + }, + { + "epoch": 1.8501529051987768, + "grad_norm": 1.022577166557312, + "learning_rate": 1.0586734362198242e-05, + "loss": 0.1573, + "step": 605 + }, + { + "epoch": 1.8654434250764527, + "grad_norm": 1.4470733404159546, + "learning_rate": 1.0345493681100449e-05, + "loss": 0.1631, + "step": 610 + }, + { + "epoch": 1.8807339449541285, + "grad_norm": 1.1706517934799194, + "learning_rate": 1.0105577962890235e-05, + "loss": 0.166, + "step": 615 + }, + { + "epoch": 1.8960244648318043, + "grad_norm": 1.1487064361572266, + "learning_rate": 9.867055502549072e-06, + "loss": 0.1642, + "step": 620 + }, + { + "epoch": 1.9113149847094801, + "grad_norm": 1.6226613521575928, + "learning_rate": 9.629994198450305e-06, + "loss": 0.1661, + "step": 625 + }, + { + "epoch": 1.926605504587156, + "grad_norm": 1.1016381978988647, + "learning_rate": 9.394461533031059e-06, + "loss": 0.1661, + "step": 630 + }, + { + "epoch": 1.9418960244648318, + "grad_norm": 1.1012557744979858, + "learning_rate": 9.160524553582518e-06, + "loss": 0.1487, + "step": 635 + }, + { + "epoch": 1.9571865443425076, + "grad_norm": 1.0710945129394531, + "learning_rate": 8.928249853164093e-06, + "loss": 0.1485, + "step": 640 + }, + { + "epoch": 1.9724770642201834, + "grad_norm": 1.1464319229125977, + "learning_rate": 8.697703551646874e-06, + "loss": 0.1483, + "step": 645 + }, + { + "epoch": 1.9877675840978593, + "grad_norm": 1.1444896459579468, + "learning_rate": 8.468951276891778e-06, + "loss": 0.1474, + "step": 650 + }, + { + "epoch": 2.003058103975535, + "grad_norm": 0.9902957081794739, + "learning_rate": 8.242058146067796e-06, + "loss": 0.1461, + "step": 655 + }, + { + "epoch": 2.018348623853211, + "grad_norm": 1.1535534858703613, + "learning_rate": 8.017088747115554e-06, + "loss": 0.1325, + "step": 660 + }, + { + "epoch": 2.0336391437308867, + "grad_norm": 1.163069248199463, + "learning_rate": 7.794107120361572e-06, + "loss": 0.1145, + "step": 665 + }, + { + "epoch": 2.0489296636085625, + "grad_norm": 0.9736145734786987, + "learning_rate": 7.573176740288397e-06, + "loss": 0.1234, + "step": 670 + }, + { + "epoch": 2.0642201834862384, + "grad_norm": 1.1557254791259766, + "learning_rate": 7.354360497465774e-06, + "loss": 0.1241, + "step": 675 + }, + { + "epoch": 2.079510703363914, + "grad_norm": 1.510737419128418, + "learning_rate": 7.137720680648107e-06, + "loss": 0.1257, + "step": 680 + }, + { + "epoch": 2.09480122324159, + "grad_norm": 0.8890921473503113, + "learning_rate": 6.923318959043171e-06, + "loss": 0.1133, + "step": 685 + }, + { + "epoch": 2.1100917431192663, + "grad_norm": 0.8376851677894592, + "learning_rate": 6.7112163647572e-06, + "loss": 0.1128, + "step": 690 + }, + { + "epoch": 2.1253822629969417, + "grad_norm": 1.2915374040603638, + "learning_rate": 6.50147327542137e-06, + "loss": 0.1198, + "step": 695 + }, + { + "epoch": 2.140672782874618, + "grad_norm": 1.1294937133789062, + "learning_rate": 6.294149397004524e-06, + "loss": 0.1162, + "step": 700 + }, + { + "epoch": 2.1559633027522938, + "grad_norm": 0.9501897692680359, + "learning_rate": 6.089303746817157e-06, + "loss": 0.1147, + "step": 705 + }, + { + "epoch": 2.1712538226299696, + "grad_norm": 1.0779762268066406, + "learning_rate": 5.886994636711396e-06, + "loss": 0.112, + "step": 710 + }, + { + "epoch": 2.1865443425076454, + "grad_norm": 0.956552267074585, + "learning_rate": 5.687279656481812e-06, + "loss": 0.1055, + "step": 715 + }, + { + "epoch": 2.2018348623853212, + "grad_norm": 1.168547511100769, + "learning_rate": 5.490215657471752e-06, + "loss": 0.1071, + "step": 720 + }, + { + "epoch": 2.217125382262997, + "grad_norm": 0.9875157475471497, + "learning_rate": 5.295858736389932e-06, + "loss": 0.0987, + "step": 725 + }, + { + "epoch": 2.232415902140673, + "grad_norm": 1.042636513710022, + "learning_rate": 5.104264219341793e-06, + "loss": 0.0976, + "step": 730 + }, + { + "epoch": 2.2477064220183487, + "grad_norm": 0.9446932673454285, + "learning_rate": 4.915486646080247e-06, + "loss": 0.104, + "step": 735 + }, + { + "epoch": 2.2629969418960245, + "grad_norm": 0.9467479586601257, + "learning_rate": 4.729579754480262e-06, + "loss": 0.1112, + "step": 740 + }, + { + "epoch": 2.2782874617737003, + "grad_norm": 0.803740382194519, + "learning_rate": 4.54659646524169e-06, + "loss": 0.1021, + "step": 745 + }, + { + "epoch": 2.293577981651376, + "grad_norm": 0.8813977837562561, + "learning_rate": 4.366588866824769e-06, + "loss": 0.1067, + "step": 750 + }, + { + "epoch": 2.308868501529052, + "grad_norm": 0.8494495749473572, + "learning_rate": 4.1896082006224716e-06, + "loss": 0.1036, + "step": 755 + }, + { + "epoch": 2.324159021406728, + "grad_norm": 0.849310576915741, + "learning_rate": 4.015704846374018e-06, + "loss": 0.0918, + "step": 760 + }, + { + "epoch": 2.3394495412844036, + "grad_norm": 1.1873241662979126, + "learning_rate": 3.844928307823655e-06, + "loss": 0.0992, + "step": 765 + }, + { + "epoch": 2.3547400611620795, + "grad_norm": 0.8046991229057312, + "learning_rate": 3.6773271986287993e-06, + "loss": 0.0943, + "step": 770 + }, + { + "epoch": 2.3700305810397553, + "grad_norm": 1.065780758857727, + "learning_rate": 3.5129492285215713e-06, + "loss": 0.0983, + "step": 775 + }, + { + "epoch": 2.385321100917431, + "grad_norm": 0.9721361994743347, + "learning_rate": 3.3518411897276064e-06, + "loss": 0.0877, + "step": 780 + }, + { + "epoch": 2.400611620795107, + "grad_norm": 0.7490877509117126, + "learning_rate": 3.194048943646092e-06, + "loss": 0.0815, + "step": 785 + }, + { + "epoch": 2.4159021406727827, + "grad_norm": 0.9084969162940979, + "learning_rate": 3.0396174077947416e-06, + "loss": 0.1037, + "step": 790 + }, + { + "epoch": 2.4311926605504586, + "grad_norm": 0.9129403829574585, + "learning_rate": 2.8885905430234504e-06, + "loss": 0.0967, + "step": 795 + }, + { + "epoch": 2.4464831804281344, + "grad_norm": 0.8506408333778381, + "learning_rate": 2.74101134100033e-06, + "loss": 0.0969, + "step": 800 + }, + { + "epoch": 2.46177370030581, + "grad_norm": 0.8811430335044861, + "learning_rate": 2.5969218119735705e-06, + "loss": 0.091, + "step": 805 + }, + { + "epoch": 2.477064220183486, + "grad_norm": 0.9298674464225769, + "learning_rate": 2.456362972812714e-06, + "loss": 0.1172, + "step": 810 + }, + { + "epoch": 2.4923547400611623, + "grad_norm": 0.8561994433403015, + "learning_rate": 2.3193748353326786e-06, + "loss": 0.0823, + "step": 815 + }, + { + "epoch": 2.5076452599388377, + "grad_norm": 0.8696724772453308, + "learning_rate": 2.1859963949039258e-06, + "loss": 0.1088, + "step": 820 + }, + { + "epoch": 2.522935779816514, + "grad_norm": 0.8831801414489746, + "learning_rate": 2.0562656193519287e-06, + "loss": 0.0819, + "step": 825 + }, + { + "epoch": 2.5382262996941893, + "grad_norm": 0.7637335062026978, + "learning_rate": 1.930219438149159e-06, + "loss": 0.0869, + "step": 830 + }, + { + "epoch": 2.5535168195718656, + "grad_norm": 1.0260728597640991, + "learning_rate": 1.8078937319026655e-06, + "loss": 0.0958, + "step": 835 + }, + { + "epoch": 2.5688073394495414, + "grad_norm": 0.7075165510177612, + "learning_rate": 1.6893233221401966e-06, + "loss": 0.0917, + "step": 840 + }, + { + "epoch": 2.5840978593272173, + "grad_norm": 0.827116847038269, + "learning_rate": 1.5745419613978463e-06, + "loss": 0.0947, + "step": 845 + }, + { + "epoch": 2.599388379204893, + "grad_norm": 0.7628703117370605, + "learning_rate": 1.4635823236119544e-06, + "loss": 0.0911, + "step": 850 + }, + { + "epoch": 2.614678899082569, + "grad_norm": 0.9345046281814575, + "learning_rate": 1.3564759948180816e-06, + "loss": 0.0894, + "step": 855 + }, + { + "epoch": 2.6299694189602447, + "grad_norm": 0.8761036992073059, + "learning_rate": 1.2532534641596345e-06, + "loss": 0.0915, + "step": 860 + }, + { + "epoch": 2.6452599388379205, + "grad_norm": 0.7703090310096741, + "learning_rate": 1.1539441152087783e-06, + "loss": 0.0781, + "step": 865 + }, + { + "epoch": 2.6605504587155964, + "grad_norm": 0.8350563645362854, + "learning_rate": 1.0585762176020148e-06, + "loss": 0.0945, + "step": 870 + }, + { + "epoch": 2.675840978593272, + "grad_norm": 0.6997930407524109, + "learning_rate": 9.67176918992894e-07, + "loss": 0.0779, + "step": 875 + }, + { + "epoch": 2.691131498470948, + "grad_norm": 0.7351768016815186, + "learning_rate": 8.797722373240985e-07, + "loss": 0.0916, + "step": 880 + }, + { + "epoch": 2.706422018348624, + "grad_norm": 0.8368445634841919, + "learning_rate": 7.963870534211093e-07, + "loss": 0.0909, + "step": 885 + }, + { + "epoch": 2.7217125382262997, + "grad_norm": 0.622424840927124, + "learning_rate": 7.170451039095649e-07, + "loss": 0.0823, + "step": 890 + }, + { + "epoch": 2.7370030581039755, + "grad_norm": 0.6975269913673401, + "learning_rate": 6.417689744583504e-07, + "loss": 0.0778, + "step": 895 + }, + { + "epoch": 2.7522935779816513, + "grad_norm": 0.6831955909729004, + "learning_rate": 5.705800933502974e-07, + "loss": 0.0815, + "step": 900 + }, + { + "epoch": 2.767584097859327, + "grad_norm": 0.6269873976707458, + "learning_rate": 5.034987253823614e-07, + "loss": 0.0717, + "step": 905 + }, + { + "epoch": 2.782874617737003, + "grad_norm": 0.7579420208930969, + "learning_rate": 4.405439660969929e-07, + "loss": 0.0785, + "step": 910 + }, + { + "epoch": 2.7981651376146788, + "grad_norm": 0.7299777865409851, + "learning_rate": 3.8173373634635745e-07, + "loss": 0.0926, + "step": 915 + }, + { + "epoch": 2.8134556574923546, + "grad_norm": 0.8547154664993286, + "learning_rate": 3.270847771909463e-07, + "loss": 0.0973, + "step": 920 + }, + { + "epoch": 2.8287461773700304, + "grad_norm": 0.7282462120056152, + "learning_rate": 2.766126451340184e-07, + "loss": 0.0907, + "step": 925 + }, + { + "epoch": 2.8440366972477067, + "grad_norm": 0.8509518504142761, + "learning_rate": 2.3033170769325763e-07, + "loss": 0.0796, + "step": 930 + }, + { + "epoch": 2.859327217125382, + "grad_norm": 0.9212472438812256, + "learning_rate": 1.8825513931086947e-07, + "loss": 0.0926, + "step": 935 + }, + { + "epoch": 2.8746177370030583, + "grad_norm": 0.629286527633667, + "learning_rate": 1.503949176033259e-07, + "loss": 0.0753, + "step": 940 + }, + { + "epoch": 2.8899082568807337, + "grad_norm": 0.8056069612503052, + "learning_rate": 1.1676181995177781e-07, + "loss": 0.0843, + "step": 945 + }, + { + "epoch": 2.90519877675841, + "grad_norm": 0.9281610250473022, + "learning_rate": 8.73654204341473e-08, + "loss": 0.0962, + "step": 950 + }, + { + "epoch": 2.9204892966360854, + "grad_norm": 0.8024609088897705, + "learning_rate": 6.221408709974619e-08, + "loss": 0.1, + "step": 955 + }, + { + "epoch": 2.9357798165137616, + "grad_norm": 0.5202727317810059, + "learning_rate": 4.131497958720143e-08, + "loss": 0.0721, + "step": 960 + }, + { + "epoch": 2.9510703363914375, + "grad_norm": 0.6651294231414795, + "learning_rate": 2.4674047086383767e-08, + "loss": 0.0756, + "step": 965 + }, + { + "epoch": 2.9663608562691133, + "grad_norm": 0.8018882870674133, + "learning_rate": 1.2296026644890713e-08, + "loss": 0.0847, + "step": 970 + }, + { + "epoch": 2.981651376146789, + "grad_norm": 0.7078405022621155, + "learning_rate": 4.184441819588547e-09, + "loss": 0.0825, + "step": 975 + }, + { + "epoch": 2.996941896024465, + "grad_norm": 0.7568854093551636, + "learning_rate": 3.416016735929839e-10, + "loss": 0.0638, + "step": 980 + }, + { + "epoch": 3.0, + "step": 981, + "total_flos": 1.225514319484027e+18, + "train_loss": 0.422133161341743, + "train_runtime": 987.0743, + "train_samples_per_second": 31.8, + "train_steps_per_second": 0.994 + } + ], + "logging_steps": 5, + "max_steps": 981, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.225514319484027e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..62f2f54ccf7d076cb1293f2fec195abf360a17a7 --- /dev/null +++ b/hotpotqa_test_knowledge_50_base/5_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:186405e0820006254c8933012e17280c0418f0584b785c773bbb63f6c46cb234 +size 8273 diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e760b3a4cb55ac6b93a1be33e9fd54af1b368a3d --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2055853c6c8c365bf1308f5b9d016eb59ebb821c --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "down_proj", + "gate_proj", + "o_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..887442eb4b76f05a1c48c3cd8a3c711b08717e34 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8276c73886f3f6d6e60af6aec582ef10b033fbb050c5398791a0453dc369a7c +size 671150064 diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d972a6cb972b70ae4a2fd12b40c8e2d45479410c --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8970279091854377e+18, + "train_loss": 0.46760529968445036, + "train_runtime": 1488.4997, + "train_samples": 14795, + "train_samples_per_second": 29.819, + "train_steps_per_second": 0.933 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/chat_template.jinja b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d972a6cb972b70ae4a2fd12b40c8e2d45479410c --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8970279091854377e+18, + "train_loss": 0.46760529968445036, + "train_runtime": 1488.4997, + "train_samples": 14795, + "train_samples_per_second": 29.819, + "train_steps_per_second": 0.933 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1d650e04909739046b48241783d3e2cfe6c3a020 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1982 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1389, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010810810810810811, + "grad_norm": 0.6381889581680298, + "learning_rate": 1.7142857142857143e-06, + "loss": 1.6745, + "step": 5 + }, + { + "epoch": 0.021621621621621623, + "grad_norm": 0.6009920239448547, + "learning_rate": 3.857142857142857e-06, + "loss": 1.7225, + "step": 10 + }, + { + "epoch": 0.032432432432432434, + "grad_norm": 0.5131360292434692, + "learning_rate": 6e-06, + "loss": 1.5997, + "step": 15 + }, + { + "epoch": 0.043243243243243246, + "grad_norm": 0.48929300904273987, + "learning_rate": 8.142857142857142e-06, + "loss": 1.684, + "step": 20 + }, + { + "epoch": 0.05405405405405406, + "grad_norm": 0.4530405104160309, + "learning_rate": 1.0285714285714286e-05, + "loss": 1.6449, + "step": 25 + }, + { + "epoch": 0.06486486486486487, + "grad_norm": 0.4812529981136322, + "learning_rate": 1.242857142857143e-05, + "loss": 1.6146, + "step": 30 + }, + { + "epoch": 0.07567567567567568, + "grad_norm": 0.4733086824417114, + "learning_rate": 1.4571428571428571e-05, + "loss": 1.5906, + "step": 35 + }, + { + "epoch": 0.08648648648648649, + "grad_norm": 0.45789840817451477, + "learning_rate": 1.6714285714285716e-05, + "loss": 1.6027, + "step": 40 + }, + { + "epoch": 0.0972972972972973, + "grad_norm": 0.4302331209182739, + "learning_rate": 1.8857142857142856e-05, + "loss": 1.5742, + "step": 45 + }, + { + "epoch": 0.10810810810810811, + "grad_norm": 0.45499739050865173, + "learning_rate": 2.1e-05, + "loss": 1.6062, + "step": 50 + }, + { + "epoch": 0.11891891891891893, + "grad_norm": 0.48351895809173584, + "learning_rate": 2.3142857142857145e-05, + "loss": 1.5027, + "step": 55 + }, + { + "epoch": 0.12972972972972974, + "grad_norm": 0.5247782468795776, + "learning_rate": 2.5285714285714285e-05, + "loss": 1.474, + "step": 60 + }, + { + "epoch": 0.14054054054054055, + "grad_norm": 0.49603092670440674, + "learning_rate": 2.7428571428571428e-05, + "loss": 1.5197, + "step": 65 + }, + { + "epoch": 0.15135135135135136, + "grad_norm": 0.5653012990951538, + "learning_rate": 2.9571428571428575e-05, + "loss": 1.466, + "step": 70 + }, + { + "epoch": 0.16216216216216217, + "grad_norm": 0.5598549246788025, + "learning_rate": 2.999931924927058e-05, + "loss": 1.4261, + "step": 75 + }, + { + "epoch": 0.17297297297297298, + "grad_norm": 0.5777876973152161, + "learning_rate": 2.999655380533103e-05, + "loss": 1.5216, + "step": 80 + }, + { + "epoch": 0.1837837837837838, + "grad_norm": 0.5975826978683472, + "learning_rate": 2.999166151316113e-05, + "loss": 1.5106, + "step": 85 + }, + { + "epoch": 0.1945945945945946, + "grad_norm": 0.6301483511924744, + "learning_rate": 2.9984643066597815e-05, + "loss": 1.4119, + "step": 90 + }, + { + "epoch": 0.20540540540540542, + "grad_norm": 0.6500494480133057, + "learning_rate": 2.997549946101448e-05, + "loss": 1.3063, + "step": 95 + }, + { + "epoch": 0.21621621621621623, + "grad_norm": 0.5895209312438965, + "learning_rate": 2.9964231993179804e-05, + "loss": 1.3415, + "step": 100 + }, + { + "epoch": 0.22702702702702704, + "grad_norm": 0.6512500047683716, + "learning_rate": 2.9950842261073867e-05, + "loss": 1.3386, + "step": 105 + }, + { + "epoch": 0.23783783783783785, + "grad_norm": 0.6594590544700623, + "learning_rate": 2.9935332163661465e-05, + "loss": 1.3102, + "step": 110 + }, + { + "epoch": 0.24864864864864866, + "grad_norm": 0.6288082599639893, + "learning_rate": 2.9917703900622867e-05, + "loss": 1.349, + "step": 115 + }, + { + "epoch": 0.2594594594594595, + "grad_norm": 0.6951631903648376, + "learning_rate": 2.9897959972041778e-05, + "loss": 1.3742, + "step": 120 + }, + { + "epoch": 0.2702702702702703, + "grad_norm": 0.6691563725471497, + "learning_rate": 2.9876103178050833e-05, + "loss": 1.2756, + "step": 125 + }, + { + "epoch": 0.2810810810810811, + "grad_norm": 0.6942166686058044, + "learning_rate": 2.9852136618434435e-05, + "loss": 1.2062, + "step": 130 + }, + { + "epoch": 0.2918918918918919, + "grad_norm": 0.8009172081947327, + "learning_rate": 2.9826063692189152e-05, + "loss": 1.2517, + "step": 135 + }, + { + "epoch": 0.3027027027027027, + "grad_norm": 0.7842971682548523, + "learning_rate": 2.9797888097041656e-05, + "loss": 1.2125, + "step": 140 + }, + { + "epoch": 0.31351351351351353, + "grad_norm": 0.7655503749847412, + "learning_rate": 2.9767613828924316e-05, + "loss": 1.1244, + "step": 145 + }, + { + "epoch": 0.32432432432432434, + "grad_norm": 0.7304131984710693, + "learning_rate": 2.9735245181408466e-05, + "loss": 1.1476, + "step": 150 + }, + { + "epoch": 0.33513513513513515, + "grad_norm": 0.7515624165534973, + "learning_rate": 2.9700786745095483e-05, + "loss": 1.1261, + "step": 155 + }, + { + "epoch": 0.34594594594594597, + "grad_norm": 0.9079862236976624, + "learning_rate": 2.966424340696577e-05, + "loss": 1.0715, + "step": 160 + }, + { + "epoch": 0.3567567567567568, + "grad_norm": 0.8592352271080017, + "learning_rate": 2.9625620349685617e-05, + "loss": 1.136, + "step": 165 + }, + { + "epoch": 0.3675675675675676, + "grad_norm": 0.8973296880722046, + "learning_rate": 2.9584923050872218e-05, + "loss": 1.1617, + "step": 170 + }, + { + "epoch": 0.3783783783783784, + "grad_norm": 0.9361931681632996, + "learning_rate": 2.9542157282316822e-05, + "loss": 1.0731, + "step": 175 + }, + { + "epoch": 0.3891891891891892, + "grad_norm": 0.9525664448738098, + "learning_rate": 2.9497329109166154e-05, + "loss": 1.0615, + "step": 180 + }, + { + "epoch": 0.4, + "grad_norm": 0.9693201780319214, + "learning_rate": 2.9450444889062243e-05, + "loss": 1.136, + "step": 185 + }, + { + "epoch": 0.41081081081081083, + "grad_norm": 1.0949007272720337, + "learning_rate": 2.940151127124076e-05, + "loss": 1.0307, + "step": 190 + }, + { + "epoch": 0.42162162162162165, + "grad_norm": 0.8979316353797913, + "learning_rate": 2.935053519558802e-05, + "loss": 1.0206, + "step": 195 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 0.9468956589698792, + "learning_rate": 2.929752389165673e-05, + "loss": 1.0685, + "step": 200 + }, + { + "epoch": 0.44324324324324327, + "grad_norm": 1.0763999223709106, + "learning_rate": 2.9242484877640706e-05, + "loss": 0.9719, + "step": 205 + }, + { + "epoch": 0.4540540540540541, + "grad_norm": 0.9384706616401672, + "learning_rate": 2.9185425959308596e-05, + "loss": 0.9684, + "step": 210 + }, + { + "epoch": 0.4648648648648649, + "grad_norm": 1.0114260911941528, + "learning_rate": 2.912635522889686e-05, + "loss": 1.0147, + "step": 215 + }, + { + "epoch": 0.4756756756756757, + "grad_norm": 1.3161630630493164, + "learning_rate": 2.9065281063962106e-05, + "loss": 0.9433, + "step": 220 + }, + { + "epoch": 0.4864864864864865, + "grad_norm": 1.0697964429855347, + "learning_rate": 2.900221212619297e-05, + "loss": 0.9363, + "step": 225 + }, + { + "epoch": 0.4972972972972973, + "grad_norm": 0.9858373403549194, + "learning_rate": 2.893715736018168e-05, + "loss": 1.0145, + "step": 230 + }, + { + "epoch": 0.5081081081081081, + "grad_norm": 0.9789324998855591, + "learning_rate": 2.8870125992155527e-05, + "loss": 0.9474, + "step": 235 + }, + { + "epoch": 0.518918918918919, + "grad_norm": 1.1349128484725952, + "learning_rate": 2.8801127528668372e-05, + "loss": 0.9261, + "step": 240 + }, + { + "epoch": 0.5297297297297298, + "grad_norm": 1.0158307552337646, + "learning_rate": 2.87301717552524e-05, + "loss": 0.8934, + "step": 245 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 1.0250061750411987, + "learning_rate": 2.8657268735030316e-05, + "loss": 0.8713, + "step": 250 + }, + { + "epoch": 0.5513513513513514, + "grad_norm": 1.0478782653808594, + "learning_rate": 2.8582428807288182e-05, + "loss": 0.889, + "step": 255 + }, + { + "epoch": 0.5621621621621622, + "grad_norm": 1.18085515499115, + "learning_rate": 2.850566258600903e-05, + "loss": 0.8841, + "step": 260 + }, + { + "epoch": 0.572972972972973, + "grad_norm": 1.1079061031341553, + "learning_rate": 2.8426980958367618e-05, + "loss": 0.8336, + "step": 265 + }, + { + "epoch": 0.5837837837837838, + "grad_norm": 1.2246654033660889, + "learning_rate": 2.8346395083186336e-05, + "loss": 0.8433, + "step": 270 + }, + { + "epoch": 0.5945945945945946, + "grad_norm": 1.1317455768585205, + "learning_rate": 2.8263916389352673e-05, + "loss": 0.794, + "step": 275 + }, + { + "epoch": 0.6054054054054054, + "grad_norm": 1.1388928890228271, + "learning_rate": 2.8179556574198312e-05, + "loss": 0.8178, + "step": 280 + }, + { + "epoch": 0.6162162162162163, + "grad_norm": 1.2461508512496948, + "learning_rate": 2.80933276018402e-05, + "loss": 0.8414, + "step": 285 + }, + { + "epoch": 0.6270270270270271, + "grad_norm": 1.0858873128890991, + "learning_rate": 2.800524170148377e-05, + "loss": 0.7617, + "step": 290 + }, + { + "epoch": 0.6378378378378379, + "grad_norm": 1.1447216272354126, + "learning_rate": 2.7915311365688562e-05, + "loss": 0.7615, + "step": 295 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 1.216931700706482, + "learning_rate": 2.78235493485965e-05, + "loss": 0.8004, + "step": 300 + }, + { + "epoch": 0.6594594594594595, + "grad_norm": 1.2283238172531128, + "learning_rate": 2.7729968664123056e-05, + "loss": 0.7636, + "step": 305 + }, + { + "epoch": 0.6702702702702703, + "grad_norm": 1.0871161222457886, + "learning_rate": 2.7634582584111605e-05, + "loss": 0.7529, + "step": 310 + }, + { + "epoch": 0.6810810810810811, + "grad_norm": 1.1306588649749756, + "learning_rate": 2.753740463645115e-05, + "loss": 0.7031, + "step": 315 + }, + { + "epoch": 0.6918918918918919, + "grad_norm": 1.1695075035095215, + "learning_rate": 2.7438448603157802e-05, + "loss": 0.7104, + "step": 320 + }, + { + "epoch": 0.7027027027027027, + "grad_norm": 1.1853001117706299, + "learning_rate": 2.7337728518420143e-05, + "loss": 0.7483, + "step": 325 + }, + { + "epoch": 0.7135135135135136, + "grad_norm": 1.4037182331085205, + "learning_rate": 2.723525866660889e-05, + "loss": 0.7296, + "step": 330 + }, + { + "epoch": 0.7243243243243244, + "grad_norm": 1.1773074865341187, + "learning_rate": 2.713105358025104e-05, + "loss": 0.7096, + "step": 335 + }, + { + "epoch": 0.7351351351351352, + "grad_norm": 1.199796438217163, + "learning_rate": 2.7025128037968824e-05, + "loss": 0.6628, + "step": 340 + }, + { + "epoch": 0.745945945945946, + "grad_norm": 1.2040430307388306, + "learning_rate": 2.6917497062383776e-05, + "loss": 0.6976, + "step": 345 + }, + { + "epoch": 0.7567567567567568, + "grad_norm": 1.1698822975158691, + "learning_rate": 2.680817591798619e-05, + "loss": 0.6812, + "step": 350 + }, + { + "epoch": 0.7675675675675676, + "grad_norm": 1.326837420463562, + "learning_rate": 2.669718010897026e-05, + "loss": 0.6589, + "step": 355 + }, + { + "epoch": 0.7783783783783784, + "grad_norm": 1.2902953624725342, + "learning_rate": 2.6584525377035257e-05, + "loss": 0.6288, + "step": 360 + }, + { + "epoch": 0.7891891891891892, + "grad_norm": 1.2083886861801147, + "learning_rate": 2.6470227699152975e-05, + "loss": 0.6588, + "step": 365 + }, + { + "epoch": 0.8, + "grad_norm": 1.1795549392700195, + "learning_rate": 2.635430328530186e-05, + "loss": 0.6864, + "step": 370 + }, + { + "epoch": 0.8108108108108109, + "grad_norm": 1.1731563806533813, + "learning_rate": 2.6236768576168065e-05, + "loss": 0.6421, + "step": 375 + }, + { + "epoch": 0.8216216216216217, + "grad_norm": 1.435877799987793, + "learning_rate": 2.611764024081378e-05, + "loss": 0.6677, + "step": 380 + }, + { + "epoch": 0.8324324324324325, + "grad_norm": 1.2355320453643799, + "learning_rate": 2.5996935174313215e-05, + "loss": 0.5895, + "step": 385 + }, + { + "epoch": 0.8432432432432433, + "grad_norm": 1.1698821783065796, + "learning_rate": 2.587467049535645e-05, + "loss": 0.5761, + "step": 390 + }, + { + "epoch": 0.8540540540540541, + "grad_norm": 1.1961256265640259, + "learning_rate": 2.575086354382167e-05, + "loss": 0.5892, + "step": 395 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 1.5308952331542969, + "learning_rate": 2.5625531878315956e-05, + "loss": 0.6213, + "step": 400 + }, + { + "epoch": 0.8756756756756757, + "grad_norm": 1.2835497856140137, + "learning_rate": 2.5498693273685074e-05, + "loss": 0.5912, + "step": 405 + }, + { + "epoch": 0.8864864864864865, + "grad_norm": 1.2004526853561401, + "learning_rate": 2.5370365718492615e-05, + "loss": 0.5604, + "step": 410 + }, + { + "epoch": 0.8972972972972973, + "grad_norm": 1.1274139881134033, + "learning_rate": 2.5240567412468816e-05, + "loss": 0.54, + "step": 415 + }, + { + "epoch": 0.9081081081081082, + "grad_norm": 1.2470859289169312, + "learning_rate": 2.5109316763929414e-05, + "loss": 0.6146, + "step": 420 + }, + { + "epoch": 0.918918918918919, + "grad_norm": 1.1818578243255615, + "learning_rate": 2.497663238716495e-05, + "loss": 0.588, + "step": 425 + }, + { + "epoch": 0.9297297297297298, + "grad_norm": 1.642536997795105, + "learning_rate": 2.4842533099800822e-05, + "loss": 0.5463, + "step": 430 + }, + { + "epoch": 0.9405405405405406, + "grad_norm": 1.258682131767273, + "learning_rate": 2.4707037920128554e-05, + "loss": 0.5784, + "step": 435 + }, + { + "epoch": 0.9513513513513514, + "grad_norm": 1.0887631177902222, + "learning_rate": 2.4570166064408556e-05, + "loss": 0.5989, + "step": 440 + }, + { + "epoch": 0.9621621621621622, + "grad_norm": 1.3243699073791504, + "learning_rate": 2.4431936944144818e-05, + "loss": 0.4844, + "step": 445 + }, + { + "epoch": 0.972972972972973, + "grad_norm": 1.267890453338623, + "learning_rate": 2.4292370163331936e-05, + "loss": 0.5271, + "step": 450 + }, + { + "epoch": 0.9837837837837838, + "grad_norm": 1.246821403503418, + "learning_rate": 2.4151485515674837e-05, + "loss": 0.5798, + "step": 455 + }, + { + "epoch": 0.9945945945945946, + "grad_norm": 1.3142491579055786, + "learning_rate": 2.400930298178155e-05, + "loss": 0.5332, + "step": 460 + }, + { + "epoch": 1.0043243243243243, + "grad_norm": 1.2088594436645508, + "learning_rate": 2.3865842726329556e-05, + "loss": 0.5415, + "step": 465 + }, + { + "epoch": 1.0151351351351352, + "grad_norm": 1.1440794467926025, + "learning_rate": 2.3721125095205935e-05, + "loss": 0.4455, + "step": 470 + }, + { + "epoch": 1.025945945945946, + "grad_norm": 1.2541085481643677, + "learning_rate": 2.357517061262189e-05, + "loss": 0.4103, + "step": 475 + }, + { + "epoch": 1.0367567567567568, + "grad_norm": 1.5026423931121826, + "learning_rate": 2.3427999978201956e-05, + "loss": 0.5171, + "step": 480 + }, + { + "epoch": 1.0475675675675675, + "grad_norm": 1.0757910013198853, + "learning_rate": 2.3279634064048308e-05, + "loss": 0.4031, + "step": 485 + }, + { + "epoch": 1.0583783783783784, + "grad_norm": 1.221948504447937, + "learning_rate": 2.3130093911780642e-05, + "loss": 0.4299, + "step": 490 + }, + { + "epoch": 1.0691891891891891, + "grad_norm": 1.1497833728790283, + "learning_rate": 2.2979400729552014e-05, + "loss": 0.4415, + "step": 495 + }, + { + "epoch": 1.08, + "grad_norm": 1.2940278053283691, + "learning_rate": 2.2827575889041007e-05, + "loss": 0.4319, + "step": 500 + }, + { + "epoch": 1.0908108108108108, + "grad_norm": 1.2771921157836914, + "learning_rate": 2.267464092242078e-05, + "loss": 0.4134, + "step": 505 + }, + { + "epoch": 1.1016216216216217, + "grad_norm": 1.2925984859466553, + "learning_rate": 2.2520617519305325e-05, + "loss": 0.4106, + "step": 510 + }, + { + "epoch": 1.1124324324324324, + "grad_norm": 1.3321442604064941, + "learning_rate": 2.2365527523673368e-05, + "loss": 0.4438, + "step": 515 + }, + { + "epoch": 1.1232432432432433, + "grad_norm": 1.44887375831604, + "learning_rate": 2.2209392930770424e-05, + "loss": 0.4006, + "step": 520 + }, + { + "epoch": 1.134054054054054, + "grad_norm": 1.209348201751709, + "learning_rate": 2.2052235883989355e-05, + "loss": 0.4036, + "step": 525 + }, + { + "epoch": 1.144864864864865, + "grad_norm": 1.2285774946212769, + "learning_rate": 2.189407867172997e-05, + "loss": 0.4005, + "step": 530 + }, + { + "epoch": 1.1556756756756756, + "grad_norm": 1.182428002357483, + "learning_rate": 2.1734943724237986e-05, + "loss": 0.4154, + "step": 535 + }, + { + "epoch": 1.1664864864864866, + "grad_norm": 1.4728293418884277, + "learning_rate": 2.157485361042398e-05, + "loss": 0.3838, + "step": 540 + }, + { + "epoch": 1.1772972972972973, + "grad_norm": 1.1531691551208496, + "learning_rate": 2.1413831034662536e-05, + "loss": 0.3908, + "step": 545 + }, + { + "epoch": 1.1881081081081082, + "grad_norm": 1.3294119834899902, + "learning_rate": 2.1251898833572303e-05, + "loss": 0.3747, + "step": 550 + }, + { + "epoch": 1.1989189189189189, + "grad_norm": 1.2550768852233887, + "learning_rate": 2.1089079972777238e-05, + "loss": 0.3583, + "step": 555 + }, + { + "epoch": 1.2097297297297298, + "grad_norm": 1.2272930145263672, + "learning_rate": 2.092539754364957e-05, + "loss": 0.3502, + "step": 560 + }, + { + "epoch": 1.2205405405405405, + "grad_norm": 1.2808771133422852, + "learning_rate": 2.07608747600349e-05, + "loss": 0.3449, + "step": 565 + }, + { + "epoch": 1.2313513513513514, + "grad_norm": 1.3437514305114746, + "learning_rate": 2.0595534954959984e-05, + "loss": 0.3957, + "step": 570 + }, + { + "epoch": 1.2421621621621621, + "grad_norm": 1.3660558462142944, + "learning_rate": 2.0429401577323576e-05, + "loss": 0.3762, + "step": 575 + }, + { + "epoch": 1.252972972972973, + "grad_norm": 1.4005869626998901, + "learning_rate": 2.0262498188570855e-05, + "loss": 0.3387, + "step": 580 + }, + { + "epoch": 1.2637837837837838, + "grad_norm": 1.4124189615249634, + "learning_rate": 2.0094848459351853e-05, + "loss": 0.3405, + "step": 585 + }, + { + "epoch": 1.2745945945945947, + "grad_norm": 1.398332118988037, + "learning_rate": 1.992647616616447e-05, + "loss": 0.3925, + "step": 590 + }, + { + "epoch": 1.2854054054054054, + "grad_norm": 1.252535104751587, + "learning_rate": 1.9757405187982397e-05, + "loss": 0.3497, + "step": 595 + }, + { + "epoch": 1.2962162162162163, + "grad_norm": 1.5389124155044556, + "learning_rate": 1.9587659502868546e-05, + "loss": 0.3865, + "step": 600 + }, + { + "epoch": 1.307027027027027, + "grad_norm": 1.5042232275009155, + "learning_rate": 1.9417263184574453e-05, + "loss": 0.3665, + "step": 605 + }, + { + "epoch": 1.3178378378378377, + "grad_norm": 1.3920376300811768, + "learning_rate": 1.9246240399126036e-05, + "loss": 0.3398, + "step": 610 + }, + { + "epoch": 1.3286486486486486, + "grad_norm": 1.2770435810089111, + "learning_rate": 1.907461540139633e-05, + "loss": 0.3469, + "step": 615 + }, + { + "epoch": 1.3394594594594595, + "grad_norm": 1.3710784912109375, + "learning_rate": 1.8902412531665613e-05, + "loss": 0.2978, + "step": 620 + }, + { + "epoch": 1.3502702702702702, + "grad_norm": 1.1827592849731445, + "learning_rate": 1.872965621216938e-05, + "loss": 0.2802, + "step": 625 + }, + { + "epoch": 1.361081081081081, + "grad_norm": 1.2210781574249268, + "learning_rate": 1.855637094363474e-05, + "loss": 0.3124, + "step": 630 + }, + { + "epoch": 1.3718918918918919, + "grad_norm": 1.3117499351501465, + "learning_rate": 1.8382581301805676e-05, + "loss": 0.3475, + "step": 635 + }, + { + "epoch": 1.3827027027027028, + "grad_norm": 1.4101191759109497, + "learning_rate": 1.8208311933957606e-05, + "loss": 0.3186, + "step": 640 + }, + { + "epoch": 1.3935135135135135, + "grad_norm": 1.2919602394104004, + "learning_rate": 1.80335875554019e-05, + "loss": 0.2854, + "step": 645 + }, + { + "epoch": 1.4043243243243242, + "grad_norm": 1.2345503568649292, + "learning_rate": 1.7858432945980645e-05, + "loss": 0.3135, + "step": 650 + }, + { + "epoch": 1.4151351351351351, + "grad_norm": 1.4037058353424072, + "learning_rate": 1.7682872946552352e-05, + "loss": 0.3265, + "step": 655 + }, + { + "epoch": 1.425945945945946, + "grad_norm": 1.3754276037216187, + "learning_rate": 1.750693245546893e-05, + "loss": 0.3195, + "step": 660 + }, + { + "epoch": 1.4367567567567567, + "grad_norm": 1.291630506515503, + "learning_rate": 1.7330636425044553e-05, + "loss": 0.282, + "step": 665 + }, + { + "epoch": 1.4475675675675674, + "grad_norm": 1.3135409355163574, + "learning_rate": 1.7154009858016867e-05, + "loss": 0.295, + "step": 670 + }, + { + "epoch": 1.4583783783783784, + "grad_norm": 1.3253206014633179, + "learning_rate": 1.697707780400102e-05, + "loss": 0.2713, + "step": 675 + }, + { + "epoch": 1.4691891891891893, + "grad_norm": 1.3621236085891724, + "learning_rate": 1.67998653559371e-05, + "loss": 0.302, + "step": 680 + }, + { + "epoch": 1.48, + "grad_norm": 1.5253937244415283, + "learning_rate": 1.6622397646531352e-05, + "loss": 0.2768, + "step": 685 + }, + { + "epoch": 1.4908108108108107, + "grad_norm": 1.140869140625, + "learning_rate": 1.644469984469182e-05, + "loss": 0.2926, + "step": 690 + }, + { + "epoch": 1.5016216216216216, + "grad_norm": 1.4579461812973022, + "learning_rate": 1.6266797151958815e-05, + "loss": 0.3455, + "step": 695 + }, + { + "epoch": 1.5124324324324325, + "grad_norm": 1.2998229265213013, + "learning_rate": 1.6088714798930806e-05, + "loss": 0.3099, + "step": 700 + }, + { + "epoch": 1.5232432432432432, + "grad_norm": 1.1221389770507812, + "learning_rate": 1.59104780416861e-05, + "loss": 0.2811, + "step": 705 + }, + { + "epoch": 1.534054054054054, + "grad_norm": 1.4283703565597534, + "learning_rate": 1.5732112158200995e-05, + "loss": 0.2894, + "step": 710 + }, + { + "epoch": 1.5448648648648649, + "grad_norm": 1.2363924980163574, + "learning_rate": 1.5553642444764808e-05, + "loss": 0.2868, + "step": 715 + }, + { + "epoch": 1.5556756756756758, + "grad_norm": 1.303499460220337, + "learning_rate": 1.5375094212392253e-05, + "loss": 0.276, + "step": 720 + }, + { + "epoch": 1.5664864864864865, + "grad_norm": 1.171356439590454, + "learning_rate": 1.5196492783233815e-05, + "loss": 0.2911, + "step": 725 + }, + { + "epoch": 1.5772972972972972, + "grad_norm": 1.2036036252975464, + "learning_rate": 1.501786348698446e-05, + "loss": 0.2734, + "step": 730 + }, + { + "epoch": 1.588108108108108, + "grad_norm": 1.2619316577911377, + "learning_rate": 1.4839231657291343e-05, + "loss": 0.2465, + "step": 735 + }, + { + "epoch": 1.598918918918919, + "grad_norm": 1.2972297668457031, + "learning_rate": 1.4660622628160921e-05, + "loss": 0.2816, + "step": 740 + }, + { + "epoch": 1.6097297297297297, + "grad_norm": 1.1339361667633057, + "learning_rate": 1.4482061730366009e-05, + "loss": 0.2251, + "step": 745 + }, + { + "epoch": 1.6205405405405404, + "grad_norm": 1.3333203792572021, + "learning_rate": 1.4303574287853324e-05, + "loss": 0.2365, + "step": 750 + }, + { + "epoch": 1.6313513513513513, + "grad_norm": 1.2112374305725098, + "learning_rate": 1.4125185614151967e-05, + "loss": 0.2158, + "step": 755 + }, + { + "epoch": 1.6421621621621623, + "grad_norm": 1.2954938411712646, + "learning_rate": 1.3946921008783418e-05, + "loss": 0.257, + "step": 760 + }, + { + "epoch": 1.652972972972973, + "grad_norm": 1.3815137147903442, + "learning_rate": 1.3768805753673465e-05, + "loss": 0.2287, + "step": 765 + }, + { + "epoch": 1.6637837837837837, + "grad_norm": 1.1673003435134888, + "learning_rate": 1.359086510956668e-05, + "loss": 0.2685, + "step": 770 + }, + { + "epoch": 1.6745945945945946, + "grad_norm": 1.2027561664581299, + "learning_rate": 1.3413124312443874e-05, + "loss": 0.2413, + "step": 775 + }, + { + "epoch": 1.6854054054054055, + "grad_norm": 1.346320390701294, + "learning_rate": 1.3235608569943059e-05, + "loss": 0.2261, + "step": 780 + }, + { + "epoch": 1.6962162162162162, + "grad_norm": 1.2816659212112427, + "learning_rate": 1.3058343057784458e-05, + "loss": 0.2067, + "step": 785 + }, + { + "epoch": 1.707027027027027, + "grad_norm": 1.268969178199768, + "learning_rate": 1.2881352916199988e-05, + "loss": 0.2419, + "step": 790 + }, + { + "epoch": 1.7178378378378378, + "grad_norm": 1.2219328880310059, + "learning_rate": 1.270466324636786e-05, + "loss": 0.2344, + "step": 795 + }, + { + "epoch": 1.7286486486486488, + "grad_norm": 1.1999274492263794, + "learning_rate": 1.252829910685263e-05, + "loss": 0.2413, + "step": 800 + }, + { + "epoch": 1.7394594594594595, + "grad_norm": 1.3530428409576416, + "learning_rate": 1.2352285510051324e-05, + "loss": 0.2637, + "step": 805 + }, + { + "epoch": 1.7502702702702702, + "grad_norm": 1.1784980297088623, + "learning_rate": 1.2176647418646156e-05, + "loss": 0.2617, + "step": 810 + }, + { + "epoch": 1.761081081081081, + "grad_norm": 1.4476001262664795, + "learning_rate": 1.2001409742064245e-05, + "loss": 0.2002, + "step": 815 + }, + { + "epoch": 1.771891891891892, + "grad_norm": 1.2979135513305664, + "learning_rate": 1.1826597332944873e-05, + "loss": 0.2453, + "step": 820 + }, + { + "epoch": 1.7827027027027027, + "grad_norm": 1.204781413078308, + "learning_rate": 1.1652234983614848e-05, + "loss": 0.1902, + "step": 825 + }, + { + "epoch": 1.7935135135135134, + "grad_norm": 1.183113694190979, + "learning_rate": 1.1478347422572396e-05, + "loss": 0.2163, + "step": 830 + }, + { + "epoch": 1.8043243243243243, + "grad_norm": 1.2770209312438965, + "learning_rate": 1.1304959310980097e-05, + "loss": 0.2321, + "step": 835 + }, + { + "epoch": 1.8151351351351352, + "grad_norm": 1.1838551759719849, + "learning_rate": 1.113209523916737e-05, + "loss": 0.2189, + "step": 840 + }, + { + "epoch": 1.825945945945946, + "grad_norm": 1.2746931314468384, + "learning_rate": 1.0959779723143024e-05, + "loss": 0.1987, + "step": 845 + }, + { + "epoch": 1.8367567567567566, + "grad_norm": 1.5528274774551392, + "learning_rate": 1.0788037201118346e-05, + "loss": 0.1846, + "step": 850 + }, + { + "epoch": 1.8475675675675676, + "grad_norm": 1.227123498916626, + "learning_rate": 1.0616892030041184e-05, + "loss": 0.1959, + "step": 855 + }, + { + "epoch": 1.8583783783783785, + "grad_norm": 1.341099500656128, + "learning_rate": 1.0446368482141612e-05, + "loss": 0.1974, + "step": 860 + }, + { + "epoch": 1.8691891891891892, + "grad_norm": 1.2724146842956543, + "learning_rate": 1.027649074148956e-05, + "loss": 0.2075, + "step": 865 + }, + { + "epoch": 1.88, + "grad_norm": 1.1407707929611206, + "learning_rate": 1.0107282900565009e-05, + "loss": 0.1821, + "step": 870 + }, + { + "epoch": 1.8908108108108108, + "grad_norm": 1.3053250312805176, + "learning_rate": 9.938768956841077e-06, + "loss": 0.1915, + "step": 875 + }, + { + "epoch": 1.9016216216216217, + "grad_norm": 1.0981884002685547, + "learning_rate": 9.770972809380696e-06, + "loss": 0.1962, + "step": 880 + }, + { + "epoch": 1.9124324324324324, + "grad_norm": 1.19914972782135, + "learning_rate": 9.603918255447141e-06, + "loss": 0.1738, + "step": 885 + }, + { + "epoch": 1.9232432432432431, + "grad_norm": 1.0622508525848389, + "learning_rate": 9.437628987129084e-06, + "loss": 0.1793, + "step": 890 + }, + { + "epoch": 1.934054054054054, + "grad_norm": 1.3299715518951416, + "learning_rate": 9.272128587980498e-06, + "loss": 0.2166, + "step": 895 + }, + { + "epoch": 1.944864864864865, + "grad_norm": 1.455454707145691, + "learning_rate": 9.107440529675971e-06, + "loss": 0.1884, + "step": 900 + }, + { + "epoch": 1.9556756756756757, + "grad_norm": 1.0822657346725464, + "learning_rate": 8.943588168681937e-06, + "loss": 0.1868, + "step": 905 + }, + { + "epoch": 1.9664864864864864, + "grad_norm": 1.16983163356781, + "learning_rate": 8.780594742944159e-06, + "loss": 0.1608, + "step": 910 + }, + { + "epoch": 1.9772972972972973, + "grad_norm": 1.3019630908966064, + "learning_rate": 8.618483368592088e-06, + "loss": 0.1865, + "step": 915 + }, + { + "epoch": 1.9881081081081082, + "grad_norm": 1.1661171913146973, + "learning_rate": 8.457277036660463e-06, + "loss": 0.1855, + "step": 920 + }, + { + "epoch": 1.998918918918919, + "grad_norm": 1.3661718368530273, + "learning_rate": 8.296998609828704e-06, + "loss": 0.1566, + "step": 925 + }, + { + "epoch": 2.0086486486486486, + "grad_norm": 1.397322177886963, + "learning_rate": 8.13767081917841e-06, + "loss": 0.1572, + "step": 930 + }, + { + "epoch": 2.0194594594594593, + "grad_norm": 1.0491713285446167, + "learning_rate": 7.97931626096964e-06, + "loss": 0.136, + "step": 935 + }, + { + "epoch": 2.0302702702702704, + "grad_norm": 1.243077278137207, + "learning_rate": 7.821957393436202e-06, + "loss": 0.1344, + "step": 940 + }, + { + "epoch": 2.041081081081081, + "grad_norm": 1.0872808694839478, + "learning_rate": 7.665616533600628e-06, + "loss": 0.141, + "step": 945 + }, + { + "epoch": 2.051891891891892, + "grad_norm": 1.128914475440979, + "learning_rate": 7.5103158541090665e-06, + "loss": 0.1343, + "step": 950 + }, + { + "epoch": 2.0627027027027025, + "grad_norm": 0.9601897597312927, + "learning_rate": 7.356077380086726e-06, + "loss": 0.1149, + "step": 955 + }, + { + "epoch": 2.0735135135135137, + "grad_norm": 1.5162469148635864, + "learning_rate": 7.202922986014228e-06, + "loss": 0.1211, + "step": 960 + }, + { + "epoch": 2.0843243243243244, + "grad_norm": 1.0638269186019897, + "learning_rate": 7.050874392625302e-06, + "loss": 0.133, + "step": 965 + }, + { + "epoch": 2.095135135135135, + "grad_norm": 1.04863703250885, + "learning_rate": 6.899953163826292e-06, + "loss": 0.1258, + "step": 970 + }, + { + "epoch": 2.1059459459459458, + "grad_norm": 1.1880992650985718, + "learning_rate": 6.7501807036379125e-06, + "loss": 0.1336, + "step": 975 + }, + { + "epoch": 2.116756756756757, + "grad_norm": 1.1992796659469604, + "learning_rate": 6.601578253159698e-06, + "loss": 0.1451, + "step": 980 + }, + { + "epoch": 2.1275675675675676, + "grad_norm": 1.2545366287231445, + "learning_rate": 6.454166887557508e-06, + "loss": 0.12, + "step": 985 + }, + { + "epoch": 2.1383783783783783, + "grad_norm": 0.9762406349182129, + "learning_rate": 6.307967513074605e-06, + "loss": 0.1266, + "step": 990 + }, + { + "epoch": 2.149189189189189, + "grad_norm": 1.2748416662216187, + "learning_rate": 6.163000864066698e-06, + "loss": 0.1381, + "step": 995 + }, + { + "epoch": 2.16, + "grad_norm": 0.9198644757270813, + "learning_rate": 6.019287500061326e-06, + "loss": 0.1476, + "step": 1000 + }, + { + "epoch": 2.170810810810811, + "grad_norm": 0.932762086391449, + "learning_rate": 5.876847802842052e-06, + "loss": 0.1292, + "step": 1005 + }, + { + "epoch": 2.1816216216216215, + "grad_norm": 1.1341016292572021, + "learning_rate": 5.73570197355788e-06, + "loss": 0.1256, + "step": 1010 + }, + { + "epoch": 2.1924324324324322, + "grad_norm": 1.0145493745803833, + "learning_rate": 5.595870029858268e-06, + "loss": 0.1235, + "step": 1015 + }, + { + "epoch": 2.2032432432432434, + "grad_norm": 1.1165614128112793, + "learning_rate": 5.4573718030541965e-06, + "loss": 0.1212, + "step": 1020 + }, + { + "epoch": 2.214054054054054, + "grad_norm": 0.8774468898773193, + "learning_rate": 5.320226935305609e-06, + "loss": 0.1114, + "step": 1025 + }, + { + "epoch": 2.224864864864865, + "grad_norm": 1.0977457761764526, + "learning_rate": 5.184454876835746e-06, + "loss": 0.1199, + "step": 1030 + }, + { + "epoch": 2.2356756756756755, + "grad_norm": 1.032637357711792, + "learning_rate": 5.05007488317265e-06, + "loss": 0.1188, + "step": 1035 + }, + { + "epoch": 2.2464864864864866, + "grad_norm": 1.256244421005249, + "learning_rate": 4.917106012418294e-06, + "loss": 0.1258, + "step": 1040 + }, + { + "epoch": 2.2572972972972973, + "grad_norm": 0.8262854814529419, + "learning_rate": 4.7855671225457225e-06, + "loss": 0.1102, + "step": 1045 + }, + { + "epoch": 2.268108108108108, + "grad_norm": 0.9222052693367004, + "learning_rate": 4.655476868724566e-06, + "loss": 0.1155, + "step": 1050 + }, + { + "epoch": 2.2789189189189187, + "grad_norm": 0.9156045317649841, + "learning_rate": 4.526853700675325e-06, + "loss": 0.1212, + "step": 1055 + }, + { + "epoch": 2.28972972972973, + "grad_norm": 1.0266749858856201, + "learning_rate": 4.399715860052781e-06, + "loss": 0.1235, + "step": 1060 + }, + { + "epoch": 2.3005405405405406, + "grad_norm": 0.9352312088012695, + "learning_rate": 4.274081377858909e-06, + "loss": 0.0876, + "step": 1065 + }, + { + "epoch": 2.3113513513513513, + "grad_norm": 0.9430919289588928, + "learning_rate": 4.149968071885682e-06, + "loss": 0.1053, + "step": 1070 + }, + { + "epoch": 2.322162162162162, + "grad_norm": 1.0616426467895508, + "learning_rate": 4.027393544188129e-06, + "loss": 0.1142, + "step": 1075 + }, + { + "epoch": 2.332972972972973, + "grad_norm": 1.1666656732559204, + "learning_rate": 3.9063751785879365e-06, + "loss": 0.1037, + "step": 1080 + }, + { + "epoch": 2.343783783783784, + "grad_norm": 1.0223368406295776, + "learning_rate": 3.786930138208046e-06, + "loss": 0.1123, + "step": 1085 + }, + { + "epoch": 2.3545945945945945, + "grad_norm": 0.7097840905189514, + "learning_rate": 3.6690753630385436e-06, + "loss": 0.1052, + "step": 1090 + }, + { + "epoch": 2.3654054054054052, + "grad_norm": 0.9387683272361755, + "learning_rate": 3.552827567534186e-06, + "loss": 0.1042, + "step": 1095 + }, + { + "epoch": 2.3762162162162164, + "grad_norm": 0.8780773282051086, + "learning_rate": 3.4382032382439036e-06, + "loss": 0.1113, + "step": 1100 + }, + { + "epoch": 2.387027027027027, + "grad_norm": 0.8522738814353943, + "learning_rate": 3.3252186314726447e-06, + "loss": 0.1118, + "step": 1105 + }, + { + "epoch": 2.3978378378378378, + "grad_norm": 1.181625485420227, + "learning_rate": 3.213889770975881e-06, + "loss": 0.112, + "step": 1110 + }, + { + "epoch": 2.4086486486486485, + "grad_norm": 0.9534328579902649, + "learning_rate": 3.1042324456870514e-06, + "loss": 0.1131, + "step": 1115 + }, + { + "epoch": 2.4194594594594596, + "grad_norm": 0.9331811666488647, + "learning_rate": 2.996262207478356e-06, + "loss": 0.1183, + "step": 1120 + }, + { + "epoch": 2.4302702702702703, + "grad_norm": 0.8687976002693176, + "learning_rate": 2.8899943689551493e-06, + "loss": 0.1185, + "step": 1125 + }, + { + "epoch": 2.441081081081081, + "grad_norm": 0.8371797800064087, + "learning_rate": 2.7854440012842684e-06, + "loss": 0.1027, + "step": 1130 + }, + { + "epoch": 2.4518918918918917, + "grad_norm": 0.8523989319801331, + "learning_rate": 2.6826259320565903e-06, + "loss": 0.0968, + "step": 1135 + }, + { + "epoch": 2.462702702702703, + "grad_norm": 1.2106547355651855, + "learning_rate": 2.581554743184158e-06, + "loss": 0.1248, + "step": 1140 + }, + { + "epoch": 2.4735135135135136, + "grad_norm": 0.9950792789459229, + "learning_rate": 2.482244768832146e-06, + "loss": 0.1204, + "step": 1145 + }, + { + "epoch": 2.4843243243243243, + "grad_norm": 0.9012033939361572, + "learning_rate": 2.384710093385929e-06, + "loss": 0.1033, + "step": 1150 + }, + { + "epoch": 2.495135135135135, + "grad_norm": 0.9354862570762634, + "learning_rate": 2.288964549453633e-06, + "loss": 0.0994, + "step": 1155 + }, + { + "epoch": 2.505945945945946, + "grad_norm": 0.8202235698699951, + "learning_rate": 2.1950217159043197e-06, + "loss": 0.1068, + "step": 1160 + }, + { + "epoch": 2.516756756756757, + "grad_norm": 0.963006317615509, + "learning_rate": 2.102894915942243e-06, + "loss": 0.1048, + "step": 1165 + }, + { + "epoch": 2.5275675675675675, + "grad_norm": 0.9164365530014038, + "learning_rate": 2.0125972152172777e-06, + "loss": 0.1168, + "step": 1170 + }, + { + "epoch": 2.538378378378378, + "grad_norm": 0.8437461853027344, + "learning_rate": 1.924141419971949e-06, + "loss": 0.0888, + "step": 1175 + }, + { + "epoch": 2.5491891891891894, + "grad_norm": 0.9043228626251221, + "learning_rate": 1.8375400752251943e-06, + "loss": 0.0969, + "step": 1180 + }, + { + "epoch": 2.56, + "grad_norm": 0.8771549463272095, + "learning_rate": 1.75280546299322e-06, + "loss": 0.0917, + "step": 1185 + }, + { + "epoch": 2.5708108108108108, + "grad_norm": 0.7359907031059265, + "learning_rate": 1.6699496005476244e-06, + "loss": 0.0895, + "step": 1190 + }, + { + "epoch": 2.581621621621622, + "grad_norm": 0.6724054217338562, + "learning_rate": 1.5889842387110742e-06, + "loss": 0.0999, + "step": 1195 + }, + { + "epoch": 2.5924324324324326, + "grad_norm": 0.9827194809913635, + "learning_rate": 1.5099208601907777e-06, + "loss": 0.099, + "step": 1200 + }, + { + "epoch": 2.6032432432432433, + "grad_norm": 0.8117175102233887, + "learning_rate": 1.4327706779499927e-06, + "loss": 0.0855, + "step": 1205 + }, + { + "epoch": 2.614054054054054, + "grad_norm": 0.8355165123939514, + "learning_rate": 1.3575446336177506e-06, + "loss": 0.091, + "step": 1210 + }, + { + "epoch": 2.6248648648648647, + "grad_norm": 0.8091166019439697, + "learning_rate": 1.2842533959371066e-06, + "loss": 0.1111, + "step": 1215 + }, + { + "epoch": 2.6356756756756754, + "grad_norm": 0.7265312671661377, + "learning_rate": 1.2129073592520685e-06, + "loss": 0.102, + "step": 1220 + }, + { + "epoch": 2.6464864864864865, + "grad_norm": 0.7786859273910522, + "learning_rate": 1.1435166420334436e-06, + "loss": 0.0938, + "step": 1225 + }, + { + "epoch": 2.6572972972972972, + "grad_norm": 0.9470868110656738, + "learning_rate": 1.0760910854438104e-06, + "loss": 0.1011, + "step": 1230 + }, + { + "epoch": 2.668108108108108, + "grad_norm": 1.1094117164611816, + "learning_rate": 1.0106402519418173e-06, + "loss": 0.0956, + "step": 1235 + }, + { + "epoch": 2.678918918918919, + "grad_norm": 0.9084083437919617, + "learning_rate": 9.471734239260288e-07, + "loss": 0.1031, + "step": 1240 + }, + { + "epoch": 2.68972972972973, + "grad_norm": 0.730699360370636, + "learning_rate": 8.856996024184477e-07, + "loss": 0.0934, + "step": 1245 + }, + { + "epoch": 2.7005405405405405, + "grad_norm": 1.029892086982727, + "learning_rate": 8.262275057879926e-07, + "loss": 0.0896, + "step": 1250 + }, + { + "epoch": 2.711351351351351, + "grad_norm": 0.800001859664917, + "learning_rate": 7.6876556851401e-07, + "loss": 0.1, + "step": 1255 + }, + { + "epoch": 2.722162162162162, + "grad_norm": 0.9404628276824951, + "learning_rate": 7.133219399901097e-07, + "loss": 0.0949, + "step": 1260 + }, + { + "epoch": 2.732972972972973, + "grad_norm": 0.7931982278823853, + "learning_rate": 6.599044833683632e-07, + "loss": 0.093, + "step": 1265 + }, + { + "epoch": 2.7437837837837837, + "grad_norm": 0.615695595741272, + "learning_rate": 6.085207744441529e-07, + "loss": 0.0996, + "step": 1270 + }, + { + "epoch": 2.7545945945945944, + "grad_norm": 0.7238854169845581, + "learning_rate": 5.591781005817542e-07, + "loss": 0.0962, + "step": 1275 + }, + { + "epoch": 2.7654054054054056, + "grad_norm": 0.7023536562919617, + "learning_rate": 5.11883459680812e-07, + "loss": 0.1117, + "step": 1280 + }, + { + "epoch": 2.7762162162162163, + "grad_norm": 1.0195461511611938, + "learning_rate": 4.6664355918389244e-07, + "loss": 0.1175, + "step": 1285 + }, + { + "epoch": 2.787027027027027, + "grad_norm": 0.7082456946372986, + "learning_rate": 4.234648151252063e-07, + "loss": 0.0912, + "step": 1290 + }, + { + "epoch": 2.7978378378378377, + "grad_norm": 0.7813063859939575, + "learning_rate": 3.823533512206845e-07, + "loss": 0.0898, + "step": 1295 + }, + { + "epoch": 2.8086486486486484, + "grad_norm": 0.7761955261230469, + "learning_rate": 3.4331499799948484e-07, + "loss": 0.0937, + "step": 1300 + }, + { + "epoch": 2.8194594594594595, + "grad_norm": 0.8479624390602112, + "learning_rate": 3.063552919770984e-07, + "loss": 0.0981, + "step": 1305 + }, + { + "epoch": 2.8302702702702702, + "grad_norm": 0.9345336556434631, + "learning_rate": 2.7147947487014434e-07, + "loss": 0.1039, + "step": 1310 + }, + { + "epoch": 2.841081081081081, + "grad_norm": 0.7568656206130981, + "learning_rate": 2.38692492852986e-07, + "loss": 0.0912, + "step": 1315 + }, + { + "epoch": 2.851891891891892, + "grad_norm": 0.6731147170066833, + "learning_rate": 2.0799899585623894e-07, + "loss": 0.1026, + "step": 1320 + }, + { + "epoch": 2.8627027027027028, + "grad_norm": 0.8383321762084961, + "learning_rate": 1.7940333690731004e-07, + "loss": 0.0961, + "step": 1325 + }, + { + "epoch": 2.8735135135135135, + "grad_norm": 0.7748822569847107, + "learning_rate": 1.5290957151304795e-07, + "loss": 0.091, + "step": 1330 + }, + { + "epoch": 2.884324324324324, + "grad_norm": 0.9053775668144226, + "learning_rate": 1.2852145708457498e-07, + "loss": 0.0967, + "step": 1335 + }, + { + "epoch": 2.895135135135135, + "grad_norm": 0.7281140089035034, + "learning_rate": 1.0624245240439811e-07, + "loss": 0.0962, + "step": 1340 + }, + { + "epoch": 2.905945945945946, + "grad_norm": 0.822816789150238, + "learning_rate": 8.607571713588502e-08, + "loss": 0.1072, + "step": 1345 + }, + { + "epoch": 2.9167567567567567, + "grad_norm": 0.7851207852363586, + "learning_rate": 6.802411137514897e-08, + "loss": 0.0872, + "step": 1350 + }, + { + "epoch": 2.9275675675675674, + "grad_norm": 0.8905292749404907, + "learning_rate": 5.20901952454167e-08, + "loss": 0.0942, + "step": 1355 + }, + { + "epoch": 2.9383783783783786, + "grad_norm": 0.8317160606384277, + "learning_rate": 3.827622853395551e-08, + "loss": 0.1042, + "step": 1360 + }, + { + "epoch": 2.9491891891891893, + "grad_norm": 0.7699822187423706, + "learning_rate": 2.6584170371578008e-08, + "loss": 0.1014, + "step": 1365 + }, + { + "epoch": 2.96, + "grad_norm": 0.6866990327835083, + "learning_rate": 1.701567895479761e-08, + "loss": 0.1058, + "step": 1370 + }, + { + "epoch": 2.9708108108108107, + "grad_norm": 0.7766857743263245, + "learning_rate": 9.572111310653387e-09, + "loss": 0.1068, + "step": 1375 + }, + { + "epoch": 2.9816216216216214, + "grad_norm": 0.746891438961029, + "learning_rate": 4.254523104260666e-09, + "loss": 0.0935, + "step": 1380 + }, + { + "epoch": 2.9924324324324325, + "grad_norm": 0.8083899021148682, + "learning_rate": 1.0636684890874548e-09, + "loss": 0.0987, + "step": 1385 + }, + { + "epoch": 3.0, + "step": 1389, + "total_flos": 1.8970279091854377e+18, + "train_loss": 0.46760529968445036, + "train_runtime": 1488.4997, + "train_samples_per_second": 29.819, + "train_steps_per_second": 0.933 + } + ], + "logging_steps": 5, + "max_steps": 1389, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8970279091854377e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a168b40d0708cc15b5b569a02d4c0dc768106fd6 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2cd5727ab1a2872e7f10eb6f046c26c9bb85fbe7ae55593989968f6c01fd7ac +size 8273 diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..704b4717b76da661b056084a1298a44761a43c00 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 1_128_e3_3e-5 + results: [] +--- + + + +# 1_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a30ec5be6a1b7e452c6e6447deced57c00a15afd --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "v_proj", + "q_proj", + "down_proj", + "k_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..474874e4e0d926375899315bf89e14ce89f5e822 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec715e4a7f4e054ab8123d033791e37375c0fb9c7ba4320c3ce4dfc9a8381c2 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..859a93c110353d746b46f386aa399299f7450ee8 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.508233453999489e+18, + "train_loss": 0.43455437022609245, + "train_runtime": 1182.767, + "train_samples": 12174, + "train_samples_per_second": 30.878, + "train_steps_per_second": 0.966 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/chat_template.jinja b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..859a93c110353d746b46f386aa399299f7450ee8 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.508233453999489e+18, + "train_loss": 0.43455437022609245, + "train_runtime": 1182.767, + "train_samples": 12174, + "train_samples_per_second": 30.878, + "train_steps_per_second": 0.966 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..451ba5c6d86215b0be1cafe21237ab8a796fabf7 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1639 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1143, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013140604467805518, + "grad_norm": 0.586513876914978, + "learning_rate": 2.068965517241379e-06, + "loss": 1.5571, + "step": 5 + }, + { + "epoch": 0.026281208935611037, + "grad_norm": 0.533902645111084, + "learning_rate": 4.655172413793104e-06, + "loss": 1.6174, + "step": 10 + }, + { + "epoch": 0.03942181340341656, + "grad_norm": 0.5319889783859253, + "learning_rate": 7.241379310344828e-06, + "loss": 1.6276, + "step": 15 + }, + { + "epoch": 0.052562417871222074, + "grad_norm": 0.4428861141204834, + "learning_rate": 9.827586206896551e-06, + "loss": 1.5638, + "step": 20 + }, + { + "epoch": 0.0657030223390276, + "grad_norm": 0.5029926896095276, + "learning_rate": 1.2413793103448277e-05, + "loss": 1.587, + "step": 25 + }, + { + "epoch": 0.07884362680683311, + "grad_norm": 0.4494924545288086, + "learning_rate": 1.5e-05, + "loss": 1.5198, + "step": 30 + }, + { + "epoch": 0.09198423127463863, + "grad_norm": 0.46659615635871887, + "learning_rate": 1.7586206896551724e-05, + "loss": 1.472, + "step": 35 + }, + { + "epoch": 0.10512483574244415, + "grad_norm": 0.4505350887775421, + "learning_rate": 2.017241379310345e-05, + "loss": 1.4877, + "step": 40 + }, + { + "epoch": 0.11826544021024968, + "grad_norm": 0.4819447994232178, + "learning_rate": 2.275862068965517e-05, + "loss": 1.5074, + "step": 45 + }, + { + "epoch": 0.1314060446780552, + "grad_norm": 0.4871845245361328, + "learning_rate": 2.5344827586206897e-05, + "loss": 1.4357, + "step": 50 + }, + { + "epoch": 0.1445466491458607, + "grad_norm": 0.4965895116329193, + "learning_rate": 2.793103448275862e-05, + "loss": 1.4069, + "step": 55 + }, + { + "epoch": 0.15768725361366623, + "grad_norm": 0.4973182678222656, + "learning_rate": 2.999993712163665e-05, + "loss": 1.4141, + "step": 60 + }, + { + "epoch": 0.17082785808147175, + "grad_norm": 0.6018816828727722, + "learning_rate": 2.9997736434270605e-05, + "loss": 1.3601, + "step": 65 + }, + { + "epoch": 0.18396846254927726, + "grad_norm": 0.5405107736587524, + "learning_rate": 2.9992392355875752e-05, + "loss": 1.3867, + "step": 70 + }, + { + "epoch": 0.19710906701708278, + "grad_norm": 0.6256355047225952, + "learning_rate": 2.9983906006522986e-05, + "loss": 1.2759, + "step": 75 + }, + { + "epoch": 0.2102496714848883, + "grad_norm": 0.6459932923316956, + "learning_rate": 2.9972279164875014e-05, + "loss": 1.2958, + "step": 80 + }, + { + "epoch": 0.2233902759526938, + "grad_norm": 0.6749354004859924, + "learning_rate": 2.9957514267813553e-05, + "loss": 1.2952, + "step": 85 + }, + { + "epoch": 0.23653088042049936, + "grad_norm": 0.6154540181159973, + "learning_rate": 2.993961440992859e-05, + "loss": 1.2815, + "step": 90 + }, + { + "epoch": 0.24967148488830487, + "grad_norm": 0.6539896726608276, + "learning_rate": 2.9918583342869767e-05, + "loss": 1.21, + "step": 95 + }, + { + "epoch": 0.2628120893561104, + "grad_norm": 0.5965717434883118, + "learning_rate": 2.989442547456011e-05, + "loss": 1.2336, + "step": 100 + }, + { + "epoch": 0.2759526938239159, + "grad_norm": 0.9007426500320435, + "learning_rate": 2.986714586827211e-05, + "loss": 1.1539, + "step": 105 + }, + { + "epoch": 0.2890932982917214, + "grad_norm": 0.7696933150291443, + "learning_rate": 2.9836750241566558e-05, + "loss": 1.1396, + "step": 110 + }, + { + "epoch": 0.30223390275952694, + "grad_norm": 0.9050470590591431, + "learning_rate": 2.9803244965094165e-05, + "loss": 1.1162, + "step": 115 + }, + { + "epoch": 0.31537450722733246, + "grad_norm": 0.7243179678916931, + "learning_rate": 2.976663706126034e-05, + "loss": 1.0982, + "step": 120 + }, + { + "epoch": 0.328515111695138, + "grad_norm": 0.7988813519477844, + "learning_rate": 2.972693420275336e-05, + "loss": 1.1248, + "step": 125 + }, + { + "epoch": 0.3416557161629435, + "grad_norm": 0.7480372190475464, + "learning_rate": 2.9684144710936236e-05, + "loss": 1.1265, + "step": 130 + }, + { + "epoch": 0.354796320630749, + "grad_norm": 0.7815009951591492, + "learning_rate": 2.9638277554102636e-05, + "loss": 1.091, + "step": 135 + }, + { + "epoch": 0.3679369250985545, + "grad_norm": 0.7465618252754211, + "learning_rate": 2.9589342345597218e-05, + "loss": 1.0527, + "step": 140 + }, + { + "epoch": 0.38107752956636004, + "grad_norm": 0.8377552628517151, + "learning_rate": 2.953734934180073e-05, + "loss": 0.9904, + "step": 145 + }, + { + "epoch": 0.39421813403416556, + "grad_norm": 0.833862841129303, + "learning_rate": 2.9482309439980404e-05, + "loss": 0.9304, + "step": 150 + }, + { + "epoch": 0.4073587385019711, + "grad_norm": 0.8369646072387695, + "learning_rate": 2.9424234176005957e-05, + "loss": 1.0454, + "step": 155 + }, + { + "epoch": 0.4204993429697766, + "grad_norm": 1.02126944065094, + "learning_rate": 2.9363135721931798e-05, + "loss": 0.9863, + "step": 160 + }, + { + "epoch": 0.4336399474375821, + "grad_norm": 0.8830117583274841, + "learning_rate": 2.9299026883445846e-05, + "loss": 0.9718, + "step": 165 + }, + { + "epoch": 0.4467805519053876, + "grad_norm": 0.9518627524375916, + "learning_rate": 2.9231921097185604e-05, + "loss": 0.9026, + "step": 170 + }, + { + "epoch": 0.45992115637319314, + "grad_norm": 0.8812270164489746, + "learning_rate": 2.916183242792194e-05, + "loss": 0.9655, + "step": 175 + }, + { + "epoch": 0.4730617608409987, + "grad_norm": 1.056344985961914, + "learning_rate": 2.9088775565611248e-05, + "loss": 0.883, + "step": 180 + }, + { + "epoch": 0.48620236530880423, + "grad_norm": 0.9771806001663208, + "learning_rate": 2.901276582231656e-05, + "loss": 0.8812, + "step": 185 + }, + { + "epoch": 0.49934296977660975, + "grad_norm": 0.9659188389778137, + "learning_rate": 2.893381912899826e-05, + "loss": 0.8719, + "step": 190 + }, + { + "epoch": 0.5124835742444153, + "grad_norm": 1.0803059339523315, + "learning_rate": 2.8851952032175136e-05, + "loss": 0.8244, + "step": 195 + }, + { + "epoch": 0.5256241787122208, + "grad_norm": 1.19648277759552, + "learning_rate": 2.8767181690456345e-05, + "loss": 0.8764, + "step": 200 + }, + { + "epoch": 0.5387647831800263, + "grad_norm": 1.0102143287658691, + "learning_rate": 2.867952587094512e-05, + "loss": 0.811, + "step": 205 + }, + { + "epoch": 0.5519053876478318, + "grad_norm": 1.0186610221862793, + "learning_rate": 2.8589002945514987e-05, + "loss": 0.8262, + "step": 210 + }, + { + "epoch": 0.5650459921156373, + "grad_norm": 1.043400526046753, + "learning_rate": 2.8495631886959126e-05, + "loss": 0.7652, + "step": 215 + }, + { + "epoch": 0.5781865965834428, + "grad_norm": 1.020921230316162, + "learning_rate": 2.8399432265013887e-05, + "loss": 0.7091, + "step": 220 + }, + { + "epoch": 0.5913272010512484, + "grad_norm": 1.0588611364364624, + "learning_rate": 2.8300424242257125e-05, + "loss": 0.7674, + "step": 225 + }, + { + "epoch": 0.6044678055190539, + "grad_norm": 1.0992854833602905, + "learning_rate": 2.8198628569882328e-05, + "loss": 0.7668, + "step": 230 + }, + { + "epoch": 0.6176084099868594, + "grad_norm": 1.1667803525924683, + "learning_rate": 2.809406658334933e-05, + "loss": 0.7816, + "step": 235 + }, + { + "epoch": 0.6307490144546649, + "grad_norm": 1.0998116731643677, + "learning_rate": 2.7986760197912594e-05, + "loss": 0.7788, + "step": 240 + }, + { + "epoch": 0.6438896189224704, + "grad_norm": 1.1146529912948608, + "learning_rate": 2.7876731904027994e-05, + "loss": 0.7551, + "step": 245 + }, + { + "epoch": 0.657030223390276, + "grad_norm": 1.1962319612503052, + "learning_rate": 2.7764004762638977e-05, + "loss": 0.6632, + "step": 250 + }, + { + "epoch": 0.6701708278580815, + "grad_norm": 1.0639499425888062, + "learning_rate": 2.7648602400343235e-05, + "loss": 0.6962, + "step": 255 + }, + { + "epoch": 0.683311432325887, + "grad_norm": 1.0667399168014526, + "learning_rate": 2.7530549004440757e-05, + "loss": 0.7034, + "step": 260 + }, + { + "epoch": 0.6964520367936925, + "grad_norm": 1.203701376914978, + "learning_rate": 2.7409869317864406e-05, + "loss": 0.654, + "step": 265 + }, + { + "epoch": 0.709592641261498, + "grad_norm": 1.1686607599258423, + "learning_rate": 2.7286588633994e-05, + "loss": 0.6795, + "step": 270 + }, + { + "epoch": 0.7227332457293035, + "grad_norm": 1.2345540523529053, + "learning_rate": 2.7160732791355076e-05, + "loss": 0.6258, + "step": 275 + }, + { + "epoch": 0.735873850197109, + "grad_norm": 1.0722647905349731, + "learning_rate": 2.7032328168203327e-05, + "loss": 0.6623, + "step": 280 + }, + { + "epoch": 0.7490144546649146, + "grad_norm": 1.0939661264419556, + "learning_rate": 2.6901401676996e-05, + "loss": 0.6371, + "step": 285 + }, + { + "epoch": 0.7621550591327201, + "grad_norm": 1.1326231956481934, + "learning_rate": 2.6767980758751264e-05, + "loss": 0.6187, + "step": 290 + }, + { + "epoch": 0.7752956636005256, + "grad_norm": 1.1895264387130737, + "learning_rate": 2.6632093377296796e-05, + "loss": 0.6695, + "step": 295 + }, + { + "epoch": 0.7884362680683311, + "grad_norm": 1.3408381938934326, + "learning_rate": 2.649376801340887e-05, + "loss": 0.5647, + "step": 300 + }, + { + "epoch": 0.8015768725361366, + "grad_norm": 1.3603100776672363, + "learning_rate": 2.6353033658842996e-05, + "loss": 0.5942, + "step": 305 + }, + { + "epoch": 0.8147174770039421, + "grad_norm": 1.303463339805603, + "learning_rate": 2.6209919810257514e-05, + "loss": 0.556, + "step": 310 + }, + { + "epoch": 0.8278580814717477, + "grad_norm": 1.1944950819015503, + "learning_rate": 2.606445646303138e-05, + "loss": 0.5837, + "step": 315 + }, + { + "epoch": 0.8409986859395532, + "grad_norm": 1.2227811813354492, + "learning_rate": 2.591667410497738e-05, + "loss": 0.5795, + "step": 320 + }, + { + "epoch": 0.8541392904073587, + "grad_norm": 1.2282943725585938, + "learning_rate": 2.5766603709952184e-05, + "loss": 0.5609, + "step": 325 + }, + { + "epoch": 0.8672798948751642, + "grad_norm": 1.0757263898849487, + "learning_rate": 2.561427673136446e-05, + "loss": 0.5402, + "step": 330 + }, + { + "epoch": 0.8804204993429697, + "grad_norm": 1.1933873891830444, + "learning_rate": 2.5459725095582577e-05, + "loss": 0.584, + "step": 335 + }, + { + "epoch": 0.8935611038107752, + "grad_norm": 1.1297672986984253, + "learning_rate": 2.5302981195243083e-05, + "loss": 0.5652, + "step": 340 + }, + { + "epoch": 0.9067017082785808, + "grad_norm": 1.2068443298339844, + "learning_rate": 2.5144077882461516e-05, + "loss": 0.5153, + "step": 345 + }, + { + "epoch": 0.9198423127463863, + "grad_norm": 1.229136347770691, + "learning_rate": 2.4983048461946893e-05, + "loss": 0.489, + "step": 350 + }, + { + "epoch": 0.9329829172141918, + "grad_norm": 1.1409878730773926, + "learning_rate": 2.4819926684021342e-05, + "loss": 0.5024, + "step": 355 + }, + { + "epoch": 0.9461235216819974, + "grad_norm": 1.2169917821884155, + "learning_rate": 2.4654746737546328e-05, + "loss": 0.4877, + "step": 360 + }, + { + "epoch": 0.9592641261498029, + "grad_norm": 1.30276358127594, + "learning_rate": 2.4487543242756993e-05, + "loss": 0.5215, + "step": 365 + }, + { + "epoch": 0.9724047306176085, + "grad_norm": 1.1544766426086426, + "learning_rate": 2.4318351244006055e-05, + "loss": 0.5184, + "step": 370 + }, + { + "epoch": 0.985545335085414, + "grad_norm": 1.370827078819275, + "learning_rate": 2.4147206202418812e-05, + "loss": 0.4949, + "step": 375 + }, + { + "epoch": 0.9986859395532195, + "grad_norm": 1.1653227806091309, + "learning_rate": 2.3974143988460838e-05, + "loss": 0.4295, + "step": 380 + }, + { + "epoch": 1.0105124835742445, + "grad_norm": 1.1515429019927979, + "learning_rate": 2.3799200874419827e-05, + "loss": 0.4086, + "step": 385 + }, + { + "epoch": 1.02365308804205, + "grad_norm": 1.2612224817276, + "learning_rate": 2.3622413526803273e-05, + "loss": 0.3824, + "step": 390 + }, + { + "epoch": 1.0367936925098555, + "grad_norm": 1.469792366027832, + "learning_rate": 2.3443818998653464e-05, + "loss": 0.4001, + "step": 395 + }, + { + "epoch": 1.049934296977661, + "grad_norm": 1.4257794618606567, + "learning_rate": 2.3263454721781537e-05, + "loss": 0.4738, + "step": 400 + }, + { + "epoch": 1.0630749014454666, + "grad_norm": 1.1773842573165894, + "learning_rate": 2.308135849892208e-05, + "loss": 0.4543, + "step": 405 + }, + { + "epoch": 1.076215505913272, + "grad_norm": 1.283363699913025, + "learning_rate": 2.2897568495810022e-05, + "loss": 0.3303, + "step": 410 + }, + { + "epoch": 1.0893561103810776, + "grad_norm": 1.3454755544662476, + "learning_rate": 2.271212323318144e-05, + "loss": 0.4078, + "step": 415 + }, + { + "epoch": 1.1024967148488831, + "grad_norm": 1.345389723777771, + "learning_rate": 2.2525061578699962e-05, + "loss": 0.3701, + "step": 420 + }, + { + "epoch": 1.1156373193166886, + "grad_norm": 1.3119934797286987, + "learning_rate": 2.233642273881045e-05, + "loss": 0.356, + "step": 425 + }, + { + "epoch": 1.1287779237844942, + "grad_norm": 1.326712965965271, + "learning_rate": 2.2146246250521677e-05, + "loss": 0.347, + "step": 430 + }, + { + "epoch": 1.1419185282522997, + "grad_norm": 1.192302942276001, + "learning_rate": 2.1954571973119726e-05, + "loss": 0.383, + "step": 435 + }, + { + "epoch": 1.1550591327201052, + "grad_norm": 1.3372474908828735, + "learning_rate": 2.1761440079813845e-05, + "loss": 0.378, + "step": 440 + }, + { + "epoch": 1.1681997371879107, + "grad_norm": 1.2026573419570923, + "learning_rate": 2.1566891049316515e-05, + "loss": 0.3622, + "step": 445 + }, + { + "epoch": 1.1813403416557162, + "grad_norm": 1.3361687660217285, + "learning_rate": 2.137096565735943e-05, + "loss": 0.3598, + "step": 450 + }, + { + "epoch": 1.1944809461235217, + "grad_norm": 1.2036209106445312, + "learning_rate": 2.1173704968147327e-05, + "loss": 0.3138, + "step": 455 + }, + { + "epoch": 1.2076215505913273, + "grad_norm": 1.2720439434051514, + "learning_rate": 2.0975150325751262e-05, + "loss": 0.357, + "step": 460 + }, + { + "epoch": 1.2207621550591328, + "grad_norm": 1.0917346477508545, + "learning_rate": 2.0775343345443267e-05, + "loss": 0.3252, + "step": 465 + }, + { + "epoch": 1.2339027595269383, + "grad_norm": 1.2158228158950806, + "learning_rate": 2.057432590497418e-05, + "loss": 0.3234, + "step": 470 + }, + { + "epoch": 1.2470433639947438, + "grad_norm": 1.3728653192520142, + "learning_rate": 2.0372140135796407e-05, + "loss": 0.3639, + "step": 475 + }, + { + "epoch": 1.2601839684625493, + "grad_norm": 1.1484280824661255, + "learning_rate": 2.01688284142336e-05, + "loss": 0.3388, + "step": 480 + }, + { + "epoch": 1.2733245729303548, + "grad_norm": 1.2093602418899536, + "learning_rate": 1.9964433352598913e-05, + "loss": 0.3178, + "step": 485 + }, + { + "epoch": 1.2864651773981604, + "grad_norm": 1.1402921676635742, + "learning_rate": 1.975899779026386e-05, + "loss": 0.3433, + "step": 490 + }, + { + "epoch": 1.2996057818659659, + "grad_norm": 1.263271689414978, + "learning_rate": 1.955256478467959e-05, + "loss": 0.2963, + "step": 495 + }, + { + "epoch": 1.3127463863337714, + "grad_norm": 1.3957903385162354, + "learning_rate": 1.9345177602352386e-05, + "loss": 0.3131, + "step": 500 + }, + { + "epoch": 1.325886990801577, + "grad_norm": 1.1427184343338013, + "learning_rate": 1.9136879709775424e-05, + "loss": 0.3166, + "step": 505 + }, + { + "epoch": 1.3390275952693824, + "grad_norm": 1.233068585395813, + "learning_rate": 1.8927714764318588e-05, + "loss": 0.3267, + "step": 510 + }, + { + "epoch": 1.352168199737188, + "grad_norm": 1.333728313446045, + "learning_rate": 1.871772660507826e-05, + "loss": 0.304, + "step": 515 + }, + { + "epoch": 1.3653088042049935, + "grad_norm": 1.247043490409851, + "learning_rate": 1.8506959243689043e-05, + "loss": 0.2816, + "step": 520 + }, + { + "epoch": 1.378449408672799, + "grad_norm": 1.1557133197784424, + "learning_rate": 1.829545685509934e-05, + "loss": 0.2996, + "step": 525 + }, + { + "epoch": 1.3915900131406045, + "grad_norm": 1.2661833763122559, + "learning_rate": 1.8083263768312645e-05, + "loss": 0.2871, + "step": 530 + }, + { + "epoch": 1.40473061760841, + "grad_norm": 1.223533034324646, + "learning_rate": 1.7870424457096593e-05, + "loss": 0.2647, + "step": 535 + }, + { + "epoch": 1.4178712220762155, + "grad_norm": 1.2508898973464966, + "learning_rate": 1.765698353066169e-05, + "loss": 0.2396, + "step": 540 + }, + { + "epoch": 1.431011826544021, + "grad_norm": 1.2196027040481567, + "learning_rate": 1.7442985724311566e-05, + "loss": 0.2816, + "step": 545 + }, + { + "epoch": 1.4441524310118266, + "grad_norm": 1.3485265970230103, + "learning_rate": 1.7228475890066908e-05, + "loss": 0.3036, + "step": 550 + }, + { + "epoch": 1.457293035479632, + "grad_norm": 1.241176962852478, + "learning_rate": 1.7013498987264832e-05, + "loss": 0.283, + "step": 555 + }, + { + "epoch": 1.4704336399474376, + "grad_norm": 1.3604639768600464, + "learning_rate": 1.6798100073135865e-05, + "loss": 0.2597, + "step": 560 + }, + { + "epoch": 1.483574244415243, + "grad_norm": 1.1002471446990967, + "learning_rate": 1.6582324293360298e-05, + "loss": 0.2531, + "step": 565 + }, + { + "epoch": 1.4967148488830486, + "grad_norm": 1.2491205930709839, + "learning_rate": 1.6366216872606098e-05, + "loss": 0.2536, + "step": 570 + }, + { + "epoch": 1.5098554533508541, + "grad_norm": 1.2918310165405273, + "learning_rate": 1.6149823105050187e-05, + "loss": 0.2454, + "step": 575 + }, + { + "epoch": 1.5229960578186597, + "grad_norm": 1.3254965543746948, + "learning_rate": 1.5933188344885232e-05, + "loss": 0.2788, + "step": 580 + }, + { + "epoch": 1.5361366622864652, + "grad_norm": 1.2959513664245605, + "learning_rate": 1.5716357996813773e-05, + "loss": 0.2435, + "step": 585 + }, + { + "epoch": 1.5492772667542707, + "grad_norm": 1.3010622262954712, + "learning_rate": 1.5499377506531818e-05, + "loss": 0.2348, + "step": 590 + }, + { + "epoch": 1.5624178712220762, + "grad_norm": 1.2571618556976318, + "learning_rate": 1.5282292351203847e-05, + "loss": 0.2424, + "step": 595 + }, + { + "epoch": 1.5755584756898817, + "grad_norm": 1.1237596273422241, + "learning_rate": 1.5065148029931195e-05, + "loss": 0.2652, + "step": 600 + }, + { + "epoch": 1.5886990801576872, + "grad_norm": 1.233609914779663, + "learning_rate": 1.484799005421584e-05, + "loss": 0.2269, + "step": 605 + }, + { + "epoch": 1.6018396846254928, + "grad_norm": 1.0817575454711914, + "learning_rate": 1.4630863938421603e-05, + "loss": 0.2586, + "step": 610 + }, + { + "epoch": 1.6149802890932983, + "grad_norm": 1.4082614183425903, + "learning_rate": 1.4413815190234777e-05, + "loss": 0.2476, + "step": 615 + }, + { + "epoch": 1.6281208935611038, + "grad_norm": 1.1206270456314087, + "learning_rate": 1.419688930112607e-05, + "loss": 0.2626, + "step": 620 + }, + { + "epoch": 1.6412614980289093, + "grad_norm": 1.4617871046066284, + "learning_rate": 1.3980131736816048e-05, + "loss": 0.229, + "step": 625 + }, + { + "epoch": 1.6544021024967148, + "grad_norm": 1.1052241325378418, + "learning_rate": 1.3763587927745898e-05, + "loss": 0.2169, + "step": 630 + }, + { + "epoch": 1.6675427069645203, + "grad_norm": 1.1910312175750732, + "learning_rate": 1.3547303259555625e-05, + "loss": 0.2216, + "step": 635 + }, + { + "epoch": 1.6806833114323259, + "grad_norm": 1.2168457508087158, + "learning_rate": 1.3331323063571647e-05, + "loss": 0.2389, + "step": 640 + }, + { + "epoch": 1.6938239159001314, + "grad_norm": 1.1851553916931152, + "learning_rate": 1.3115692607305718e-05, + "loss": 0.2032, + "step": 645 + }, + { + "epoch": 1.7069645203679369, + "grad_norm": 1.2295221090316772, + "learning_rate": 1.2900457084967302e-05, + "loss": 0.231, + "step": 650 + }, + { + "epoch": 1.7201051248357424, + "grad_norm": 1.4004381895065308, + "learning_rate": 1.2685661607991238e-05, + "loss": 0.2217, + "step": 655 + }, + { + "epoch": 1.733245729303548, + "grad_norm": 1.148106336593628, + "learning_rate": 1.2471351195582811e-05, + "loss": 0.1803, + "step": 660 + }, + { + "epoch": 1.7463863337713534, + "grad_norm": 1.1939607858657837, + "learning_rate": 1.2257570765282127e-05, + "loss": 0.1992, + "step": 665 + }, + { + "epoch": 1.759526938239159, + "grad_norm": 1.0682413578033447, + "learning_rate": 1.2044365123549791e-05, + "loss": 0.1998, + "step": 670 + }, + { + "epoch": 1.7726675427069645, + "grad_norm": 1.1572526693344116, + "learning_rate": 1.183177895637589e-05, + "loss": 0.2196, + "step": 675 + }, + { + "epoch": 1.78580814717477, + "grad_norm": 1.3342459201812744, + "learning_rate": 1.1619856819914186e-05, + "loss": 0.212, + "step": 680 + }, + { + "epoch": 1.7989487516425755, + "grad_norm": 1.1775377988815308, + "learning_rate": 1.1408643131143566e-05, + "loss": 0.1856, + "step": 685 + }, + { + "epoch": 1.812089356110381, + "grad_norm": 1.9276851415634155, + "learning_rate": 1.1198182158558638e-05, + "loss": 0.1762, + "step": 690 + }, + { + "epoch": 1.8252299605781865, + "grad_norm": 1.1809487342834473, + "learning_rate": 1.098851801289144e-05, + "loss": 0.1918, + "step": 695 + }, + { + "epoch": 1.838370565045992, + "grad_norm": 1.2131558656692505, + "learning_rate": 1.0779694637866257e-05, + "loss": 0.1617, + "step": 700 + }, + { + "epoch": 1.8515111695137976, + "grad_norm": 1.2316120862960815, + "learning_rate": 1.0571755800989367e-05, + "loss": 0.1764, + "step": 705 + }, + { + "epoch": 1.864651773981603, + "grad_norm": 1.2769358158111572, + "learning_rate": 1.036474508437579e-05, + "loss": 0.1853, + "step": 710 + }, + { + "epoch": 1.8777923784494086, + "grad_norm": 1.2241487503051758, + "learning_rate": 1.0158705875614877e-05, + "loss": 0.1573, + "step": 715 + }, + { + "epoch": 1.8909329829172141, + "grad_norm": 1.211738109588623, + "learning_rate": 9.953681358676622e-06, + "loss": 0.1625, + "step": 720 + }, + { + "epoch": 1.9040735873850196, + "grad_norm": 1.2510522603988647, + "learning_rate": 9.749714504860753e-06, + "loss": 0.1633, + "step": 725 + }, + { + "epoch": 1.9172141918528252, + "grad_norm": 1.4170050621032715, + "learning_rate": 9.54684806379026e-06, + "loss": 0.177, + "step": 730 + }, + { + "epoch": 1.9303547963206307, + "grad_norm": 1.1775113344192505, + "learning_rate": 9.345124554451506e-06, + "loss": 0.1589, + "step": 735 + }, + { + "epoch": 1.9434954007884362, + "grad_norm": 1.253719449043274, + "learning_rate": 9.144586256282619e-06, + "loss": 0.1576, + "step": 740 + }, + { + "epoch": 1.9566360052562417, + "grad_norm": 1.3775147199630737, + "learning_rate": 8.945275200312085e-06, + "loss": 0.1575, + "step": 745 + }, + { + "epoch": 1.9697766097240472, + "grad_norm": 1.0718121528625488, + "learning_rate": 8.747233160349434e-06, + "loss": 0.1928, + "step": 750 + }, + { + "epoch": 1.9829172141918527, + "grad_norm": 1.1574039459228516, + "learning_rate": 8.550501644229846e-06, + "loss": 0.1623, + "step": 755 + }, + { + "epoch": 1.9960578186596583, + "grad_norm": 1.1189444065093994, + "learning_rate": 8.355121885114439e-06, + "loss": 0.148, + "step": 760 + }, + { + "epoch": 2.0078843626806835, + "grad_norm": 0.9723482728004456, + "learning_rate": 8.161134832848186e-06, + "loss": 0.1396, + "step": 765 + }, + { + "epoch": 2.021024967148489, + "grad_norm": 0.999653697013855, + "learning_rate": 7.968581145377205e-06, + "loss": 0.1453, + "step": 770 + }, + { + "epoch": 2.0341655716162945, + "grad_norm": 1.0044395923614502, + "learning_rate": 7.777501180227199e-06, + "loss": 0.1299, + "step": 775 + }, + { + "epoch": 2.0473061760841, + "grad_norm": 1.0557458400726318, + "learning_rate": 7.587934986044916e-06, + "loss": 0.1285, + "step": 780 + }, + { + "epoch": 2.0604467805519056, + "grad_norm": 1.3885235786437988, + "learning_rate": 7.3999222942042635e-06, + "loss": 0.1304, + "step": 785 + }, + { + "epoch": 2.073587385019711, + "grad_norm": 0.9794416427612305, + "learning_rate": 7.213502510478993e-06, + "loss": 0.1054, + "step": 790 + }, + { + "epoch": 2.0867279894875166, + "grad_norm": 1.004404902458191, + "learning_rate": 7.028714706783626e-06, + "loss": 0.1155, + "step": 795 + }, + { + "epoch": 2.099868593955322, + "grad_norm": 0.935571014881134, + "learning_rate": 6.845597612984288e-06, + "loss": 0.1257, + "step": 800 + }, + { + "epoch": 2.1130091984231276, + "grad_norm": 0.9108352661132812, + "learning_rate": 6.664189608781295e-06, + "loss": 0.1367, + "step": 805 + }, + { + "epoch": 2.126149802890933, + "grad_norm": 1.2964626550674438, + "learning_rate": 6.484528715665131e-06, + "loss": 0.1173, + "step": 810 + }, + { + "epoch": 2.1392904073587387, + "grad_norm": 0.8613879084587097, + "learning_rate": 6.306652588947454e-06, + "loss": 0.1155, + "step": 815 + }, + { + "epoch": 2.152431011826544, + "grad_norm": 1.0308681726455688, + "learning_rate": 6.130598509868895e-06, + "loss": 0.1088, + "step": 820 + }, + { + "epoch": 2.1655716162943497, + "grad_norm": 0.7728627920150757, + "learning_rate": 5.95640337778525e-06, + "loss": 0.1248, + "step": 825 + }, + { + "epoch": 2.178712220762155, + "grad_norm": 0.9386959671974182, + "learning_rate": 5.784103702433685e-06, + "loss": 0.1135, + "step": 830 + }, + { + "epoch": 2.1918528252299607, + "grad_norm": 1.0817430019378662, + "learning_rate": 5.613735596280661e-06, + "loss": 0.1242, + "step": 835 + }, + { + "epoch": 2.2049934296977662, + "grad_norm": 1.3047266006469727, + "learning_rate": 5.445334766953037e-06, + "loss": 0.1178, + "step": 840 + }, + { + "epoch": 2.2181340341655718, + "grad_norm": 0.9140284061431885, + "learning_rate": 5.278936509754112e-06, + "loss": 0.1215, + "step": 845 + }, + { + "epoch": 2.2312746386333773, + "grad_norm": 1.1054766178131104, + "learning_rate": 5.114575700266024e-06, + "loss": 0.1083, + "step": 850 + }, + { + "epoch": 2.244415243101183, + "grad_norm": 0.8340334892272949, + "learning_rate": 4.95228678704014e-06, + "loss": 0.1018, + "step": 855 + }, + { + "epoch": 2.2575558475689883, + "grad_norm": 1.0238512754440308, + "learning_rate": 4.7921037843769614e-06, + "loss": 0.106, + "step": 860 + }, + { + "epoch": 2.270696452036794, + "grad_norm": 1.0370928049087524, + "learning_rate": 4.6340602651970304e-06, + "loss": 0.1059, + "step": 865 + }, + { + "epoch": 2.2838370565045993, + "grad_norm": 1.3566575050354004, + "learning_rate": 4.478189354004334e-06, + "loss": 0.1231, + "step": 870 + }, + { + "epoch": 2.296977660972405, + "grad_norm": 0.9385572075843811, + "learning_rate": 4.324523719943716e-06, + "loss": 0.1256, + "step": 875 + }, + { + "epoch": 2.3101182654402104, + "grad_norm": 0.8618457913398743, + "learning_rate": 4.173095569953708e-06, + "loss": 0.0945, + "step": 880 + }, + { + "epoch": 2.323258869908016, + "grad_norm": 0.9638295769691467, + "learning_rate": 4.023936642016266e-06, + "loss": 0.1168, + "step": 885 + }, + { + "epoch": 2.3363994743758214, + "grad_norm": 0.9240545630455017, + "learning_rate": 3.87707819850474e-06, + "loss": 0.1094, + "step": 890 + }, + { + "epoch": 2.349540078843627, + "grad_norm": 1.1754658222198486, + "learning_rate": 3.7325510196315964e-06, + "loss": 0.1032, + "step": 895 + }, + { + "epoch": 2.3626806833114324, + "grad_norm": 0.7762131690979004, + "learning_rate": 3.5903853969971335e-06, + "loss": 0.0826, + "step": 900 + }, + { + "epoch": 2.375821287779238, + "grad_norm": 0.9930109977722168, + "learning_rate": 3.450611127240646e-06, + "loss": 0.0981, + "step": 905 + }, + { + "epoch": 2.3889618922470435, + "grad_norm": 0.9907879829406738, + "learning_rate": 3.313257505795317e-06, + "loss": 0.0961, + "step": 910 + }, + { + "epoch": 2.402102496714849, + "grad_norm": 0.720212996006012, + "learning_rate": 3.1783533207481537e-06, + "loss": 0.0866, + "step": 915 + }, + { + "epoch": 2.4152431011826545, + "grad_norm": 0.8982062935829163, + "learning_rate": 3.045926846806277e-06, + "loss": 0.102, + "step": 920 + }, + { + "epoch": 2.42838370565046, + "grad_norm": 0.9239360690116882, + "learning_rate": 2.9160058393707656e-06, + "loss": 0.105, + "step": 925 + }, + { + "epoch": 2.4415243101182655, + "grad_norm": 1.0574581623077393, + "learning_rate": 2.7886175287194142e-06, + "loss": 0.0932, + "step": 930 + }, + { + "epoch": 2.454664914586071, + "grad_norm": 0.7171840071678162, + "learning_rate": 2.6637886142994725e-06, + "loss": 0.0887, + "step": 935 + }, + { + "epoch": 2.4678055190538766, + "grad_norm": 0.923037052154541, + "learning_rate": 2.5415452591317023e-06, + "loss": 0.1041, + "step": 940 + }, + { + "epoch": 2.480946123521682, + "grad_norm": 0.9015092253684998, + "learning_rate": 2.4219130843268362e-06, + "loss": 0.1163, + "step": 945 + }, + { + "epoch": 2.4940867279894876, + "grad_norm": 0.733551561832428, + "learning_rate": 2.304917163715636e-06, + "loss": 0.0941, + "step": 950 + }, + { + "epoch": 2.507227332457293, + "grad_norm": 0.671435534954071, + "learning_rate": 2.1905820185936172e-06, + "loss": 0.0808, + "step": 955 + }, + { + "epoch": 2.5203679369250986, + "grad_norm": 1.0535223484039307, + "learning_rate": 2.0789316125816275e-06, + "loss": 0.1004, + "step": 960 + }, + { + "epoch": 2.533508541392904, + "grad_norm": 0.9018049240112305, + "learning_rate": 1.9699893466032733e-06, + "loss": 0.0928, + "step": 965 + }, + { + "epoch": 2.5466491458607097, + "grad_norm": 0.6735294461250305, + "learning_rate": 1.8637780539803118e-06, + "loss": 0.0956, + "step": 970 + }, + { + "epoch": 2.559789750328515, + "grad_norm": 0.958332896232605, + "learning_rate": 1.760319995646968e-06, + "loss": 0.0881, + "step": 975 + }, + { + "epoch": 2.5729303547963207, + "grad_norm": 0.7761551737785339, + "learning_rate": 1.6596368554842673e-06, + "loss": 0.099, + "step": 980 + }, + { + "epoch": 2.5860709592641262, + "grad_norm": 0.675287663936615, + "learning_rate": 1.5617497357752724e-06, + "loss": 0.0869, + "step": 985 + }, + { + "epoch": 2.5992115637319317, + "grad_norm": 0.8552087545394897, + "learning_rate": 1.4666791527822377e-06, + "loss": 0.0846, + "step": 990 + }, + { + "epoch": 2.6123521681997373, + "grad_norm": 0.7254818677902222, + "learning_rate": 1.374445032446588e-06, + "loss": 0.0761, + "step": 995 + }, + { + "epoch": 2.6254927726675428, + "grad_norm": 0.6427825093269348, + "learning_rate": 1.285066706212612e-06, + "loss": 0.0919, + "step": 1000 + }, + { + "epoch": 2.6386333771353483, + "grad_norm": 0.7550118565559387, + "learning_rate": 1.1985629069757847e-06, + "loss": 0.098, + "step": 1005 + }, + { + "epoch": 2.651773981603154, + "grad_norm": 0.776680052280426, + "learning_rate": 1.1149517651564944e-06, + "loss": 0.1233, + "step": 1010 + }, + { + "epoch": 2.6649145860709593, + "grad_norm": 0.7059184312820435, + "learning_rate": 1.034250804900081e-06, + "loss": 0.0896, + "step": 1015 + }, + { + "epoch": 2.678055190538765, + "grad_norm": 0.9911013841629028, + "learning_rate": 9.56476940403942e-07, + "loss": 0.1067, + "step": 1020 + }, + { + "epoch": 2.6911957950065704, + "grad_norm": 0.7715237736701965, + "learning_rate": 8.816464723724504e-07, + "loss": 0.09, + "step": 1025 + }, + { + "epoch": 2.704336399474376, + "grad_norm": 0.8007025122642517, + "learning_rate": 8.097750846004909e-07, + "loss": 0.0803, + "step": 1030 + }, + { + "epoch": 2.7174770039421814, + "grad_norm": 0.713858425617218, + "learning_rate": 7.40877840686257e-07, + "loss": 0.0874, + "step": 1035 + }, + { + "epoch": 2.730617608409987, + "grad_norm": 0.9154012203216553, + "learning_rate": 6.749691808740777e-07, + "loss": 0.0916, + "step": 1040 + }, + { + "epoch": 2.7437582128777924, + "grad_norm": 0.5980800986289978, + "learning_rate": 6.120629190278554e-07, + "loss": 0.0733, + "step": 1045 + }, + { + "epoch": 2.756898817345598, + "grad_norm": 0.567093551158905, + "learning_rate": 5.521722397358132e-07, + "loss": 0.0799, + "step": 1050 + }, + { + "epoch": 2.7700394218134035, + "grad_norm": 0.7711319327354431, + "learning_rate": 4.953096955471142e-07, + "loss": 0.0858, + "step": 1055 + }, + { + "epoch": 2.783180026281209, + "grad_norm": 0.6735128164291382, + "learning_rate": 4.414872043409757e-07, + "loss": 0.0882, + "step": 1060 + }, + { + "epoch": 2.7963206307490145, + "grad_norm": 0.7795529365539551, + "learning_rate": 3.907160468287707e-07, + "loss": 0.0948, + "step": 1065 + }, + { + "epoch": 2.80946123521682, + "grad_norm": 0.73054039478302, + "learning_rate": 3.4300686418970383e-07, + "loss": 0.0866, + "step": 1070 + }, + { + "epoch": 2.8226018396846255, + "grad_norm": 0.7019670009613037, + "learning_rate": 2.9836965584051303e-07, + "loss": 0.0811, + "step": 1075 + }, + { + "epoch": 2.835742444152431, + "grad_norm": 0.7538115978240967, + "learning_rate": 2.568137773396745e-07, + "loss": 0.0877, + "step": 1080 + }, + { + "epoch": 2.8488830486202366, + "grad_norm": 0.8146035075187683, + "learning_rate": 2.183479384265713e-07, + "loss": 0.0891, + "step": 1085 + }, + { + "epoch": 2.862023653088042, + "grad_norm": 0.7473461627960205, + "learning_rate": 1.8298020119600856e-07, + "loss": 0.0862, + "step": 1090 + }, + { + "epoch": 2.8751642575558476, + "grad_norm": 0.5878650546073914, + "learning_rate": 1.5071797840846336e-07, + "loss": 0.087, + "step": 1095 + }, + { + "epoch": 2.888304862023653, + "grad_norm": 0.6648780703544617, + "learning_rate": 1.215680319364443e-07, + "loss": 0.0805, + "step": 1100 + }, + { + "epoch": 2.9014454664914586, + "grad_norm": 0.7095662951469421, + "learning_rate": 9.553647134726173e-08, + "loss": 0.0852, + "step": 1105 + }, + { + "epoch": 2.914586070959264, + "grad_norm": 0.8825892210006714, + "learning_rate": 7.262875262251389e-08, + "loss": 0.0842, + "step": 1110 + }, + { + "epoch": 2.9277266754270697, + "grad_norm": 0.6977788209915161, + "learning_rate": 5.2849677014566445e-08, + "loss": 0.0798, + "step": 1115 + }, + { + "epoch": 2.940867279894875, + "grad_norm": 0.709928572177887, + "learning_rate": 3.620339004025086e-08, + "loss": 0.0819, + "step": 1120 + }, + { + "epoch": 2.9540078843626807, + "grad_norm": 0.6190377473831177, + "learning_rate": 2.2693380612002767e-08, + "loss": 0.0819, + "step": 1125 + }, + { + "epoch": 2.967148488830486, + "grad_norm": 0.638861894607544, + "learning_rate": 1.2322480306615202e-08, + "loss": 0.079, + "step": 1130 + }, + { + "epoch": 2.9802890932982917, + "grad_norm": 0.9236118793487549, + "learning_rate": 5.092862771765017e-09, + "loss": 0.0941, + "step": 1135 + }, + { + "epoch": 2.9934296977660972, + "grad_norm": 0.9637218713760376, + "learning_rate": 1.006043270438961e-09, + "loss": 0.094, + "step": 1140 + }, + { + "epoch": 3.0, + "step": 1143, + "total_flos": 1.508233453999489e+18, + "train_loss": 0.43455437022609245, + "train_runtime": 1182.767, + "train_samples_per_second": 30.878, + "train_steps_per_second": 0.966 + } + ], + "logging_steps": 5, + "max_steps": 1143, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.508233453999489e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..34f7d445f5170074372b8489b32077d4ef4b7c2e --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/1_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92b59d218d32d2c26a3664f2e1d7d1862828a73a22249a4aa9570885d4183a3 +size 8273 diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e0fe2362fcc0b3a6f09f90c8fcebf20f2226bd00 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 2_128_e3_3e-5 + results: [] +--- + + + +# 2_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0dc9016004b9bb02f0d13f4a222ae223c50f6b9d --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "q_proj", + "k_proj", + "gate_proj", + "v_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed16064a2587a17b1c24eeb212008235323e5759 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3bbaf1dd577ec15980f0291e38f44106b79036c22af4a075cd243cc67faf96d +size 671150064 diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..db78e241295ba54a06aa93a07a702afb9455d83a --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5604276376528486e+18, + "train_loss": 0.4172145333700033, + "train_runtime": 1220.5094, + "train_samples": 13124, + "train_samples_per_second": 32.259, + "train_steps_per_second": 1.01 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/chat_template.jinja b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..db78e241295ba54a06aa93a07a702afb9455d83a --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5604276376528486e+18, + "train_loss": 0.4172145333700033, + "train_runtime": 1220.5094, + "train_samples": 13124, + "train_samples_per_second": 32.259, + "train_steps_per_second": 1.01 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a51c491d1548e00a31ea6421c1bea0581d42a039 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1765 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1233, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01218769043266301, + "grad_norm": 0.673332929611206, + "learning_rate": 1.935483870967742e-06, + "loss": 1.6251, + "step": 5 + }, + { + "epoch": 0.02437538086532602, + "grad_norm": 0.6493095755577087, + "learning_rate": 4.35483870967742e-06, + "loss": 1.6244, + "step": 10 + }, + { + "epoch": 0.03656307129798903, + "grad_norm": 0.5241045951843262, + "learning_rate": 6.774193548387097e-06, + "loss": 1.5828, + "step": 15 + }, + { + "epoch": 0.04875076173065204, + "grad_norm": 0.5314739942550659, + "learning_rate": 9.193548387096775e-06, + "loss": 1.5534, + "step": 20 + }, + { + "epoch": 0.06093845216331505, + "grad_norm": 0.5014873743057251, + "learning_rate": 1.1612903225806451e-05, + "loss": 1.4993, + "step": 25 + }, + { + "epoch": 0.07312614259597806, + "grad_norm": 0.4819537103176117, + "learning_rate": 1.403225806451613e-05, + "loss": 1.5754, + "step": 30 + }, + { + "epoch": 0.08531383302864107, + "grad_norm": 0.47247859835624695, + "learning_rate": 1.6451612903225807e-05, + "loss": 1.4499, + "step": 35 + }, + { + "epoch": 0.09750152346130408, + "grad_norm": 0.45132675766944885, + "learning_rate": 1.8870967741935484e-05, + "loss": 1.5165, + "step": 40 + }, + { + "epoch": 0.10968921389396709, + "grad_norm": 0.4291912913322449, + "learning_rate": 2.1290322580645163e-05, + "loss": 1.4986, + "step": 45 + }, + { + "epoch": 0.1218769043266301, + "grad_norm": 0.5780102610588074, + "learning_rate": 2.370967741935484e-05, + "loss": 1.507, + "step": 50 + }, + { + "epoch": 0.1340645947592931, + "grad_norm": 0.502700924873352, + "learning_rate": 2.6129032258064516e-05, + "loss": 1.4236, + "step": 55 + }, + { + "epoch": 0.14625228519195613, + "grad_norm": 0.47568464279174805, + "learning_rate": 2.8548387096774196e-05, + "loss": 1.3876, + "step": 60 + }, + { + "epoch": 0.15843997562461914, + "grad_norm": 0.5516246557235718, + "learning_rate": 2.9999784073389242e-05, + "loss": 1.4207, + "step": 65 + }, + { + "epoch": 0.17062766605728213, + "grad_norm": 0.5291886329650879, + "learning_rate": 2.999735497041112e-05, + "loss": 1.4426, + "step": 70 + }, + { + "epoch": 0.18281535648994515, + "grad_norm": 0.5884230136871338, + "learning_rate": 2.9992227294732908e-05, + "loss": 1.3353, + "step": 75 + }, + { + "epoch": 0.19500304692260817, + "grad_norm": 0.5660185217857361, + "learning_rate": 2.9984401969011014e-05, + "loss": 1.3635, + "step": 80 + }, + { + "epoch": 0.2071907373552712, + "grad_norm": 0.5848888158798218, + "learning_rate": 2.9973880401307747e-05, + "loss": 1.2763, + "step": 85 + }, + { + "epoch": 0.21937842778793418, + "grad_norm": 0.6404935717582703, + "learning_rate": 2.9960664484837977e-05, + "loss": 1.2728, + "step": 90 + }, + { + "epoch": 0.2315661182205972, + "grad_norm": 0.6230394840240479, + "learning_rate": 2.994475659762846e-05, + "loss": 1.2637, + "step": 95 + }, + { + "epoch": 0.2437538086532602, + "grad_norm": 0.6447322368621826, + "learning_rate": 2.9926159602089955e-05, + "loss": 1.2105, + "step": 100 + }, + { + "epoch": 0.25594149908592323, + "grad_norm": 0.6419833302497864, + "learning_rate": 2.990487684450215e-05, + "loss": 1.1933, + "step": 105 + }, + { + "epoch": 0.2681291895185862, + "grad_norm": 0.7013087868690491, + "learning_rate": 2.988091215441158e-05, + "loss": 1.2009, + "step": 110 + }, + { + "epoch": 0.28031687995124926, + "grad_norm": 0.7826178073883057, + "learning_rate": 2.985426984394251e-05, + "loss": 1.1863, + "step": 115 + }, + { + "epoch": 0.29250457038391225, + "grad_norm": 0.8769330978393555, + "learning_rate": 2.9824954707021058e-05, + "loss": 1.1164, + "step": 120 + }, + { + "epoch": 0.30469226081657524, + "grad_norm": 0.7359930276870728, + "learning_rate": 2.9792972018512597e-05, + "loss": 1.1499, + "step": 125 + }, + { + "epoch": 0.3168799512492383, + "grad_norm": 0.8239122033119202, + "learning_rate": 2.9758327533272567e-05, + "loss": 1.0912, + "step": 130 + }, + { + "epoch": 0.3290676416819013, + "grad_norm": 0.9444642066955566, + "learning_rate": 2.9721027485111025e-05, + "loss": 1.0668, + "step": 135 + }, + { + "epoch": 0.34125533211456427, + "grad_norm": 0.8839324116706848, + "learning_rate": 2.9681078585670912e-05, + "loss": 1.004, + "step": 140 + }, + { + "epoch": 0.3534430225472273, + "grad_norm": 0.8448653817176819, + "learning_rate": 2.9638488023220403e-05, + "loss": 1.0478, + "step": 145 + }, + { + "epoch": 0.3656307129798903, + "grad_norm": 0.8404564261436462, + "learning_rate": 2.9593263461359465e-05, + "loss": 1.0231, + "step": 150 + }, + { + "epoch": 0.3778184034125533, + "grad_norm": 0.8701719045639038, + "learning_rate": 2.9545413037640906e-05, + "loss": 0.9725, + "step": 155 + }, + { + "epoch": 0.39000609384521634, + "grad_norm": 0.9420703649520874, + "learning_rate": 2.9494945362106125e-05, + "loss": 0.945, + "step": 160 + }, + { + "epoch": 0.40219378427787933, + "grad_norm": 0.9437912702560425, + "learning_rate": 2.9441869515735843e-05, + "loss": 0.9206, + "step": 165 + }, + { + "epoch": 0.4143814747105424, + "grad_norm": 0.9759684801101685, + "learning_rate": 2.938619504881612e-05, + "loss": 0.9812, + "step": 170 + }, + { + "epoch": 0.42656916514320536, + "grad_norm": 0.8673017024993896, + "learning_rate": 2.9327931979219895e-05, + "loss": 0.9145, + "step": 175 + }, + { + "epoch": 0.43875685557586835, + "grad_norm": 0.8988730907440186, + "learning_rate": 2.926709079060441e-05, + "loss": 0.9149, + "step": 180 + }, + { + "epoch": 0.4509445460085314, + "grad_norm": 1.06808602809906, + "learning_rate": 2.920368243052481e-05, + "loss": 0.9247, + "step": 185 + }, + { + "epoch": 0.4631322364411944, + "grad_norm": 1.1107875108718872, + "learning_rate": 2.9137718308464297e-05, + "loss": 0.8568, + "step": 190 + }, + { + "epoch": 0.4753199268738574, + "grad_norm": 1.1152838468551636, + "learning_rate": 2.9069210293781113e-05, + "loss": 0.8359, + "step": 195 + }, + { + "epoch": 0.4875076173065204, + "grad_norm": 1.0431712865829468, + "learning_rate": 2.8998170713572838e-05, + "loss": 0.8321, + "step": 200 + }, + { + "epoch": 0.4996953077391834, + "grad_norm": 1.0530294179916382, + "learning_rate": 2.8924612350458293e-05, + "loss": 0.8858, + "step": 205 + }, + { + "epoch": 0.5118829981718465, + "grad_norm": 1.0503332614898682, + "learning_rate": 2.8848548440277458e-05, + "loss": 0.7846, + "step": 210 + }, + { + "epoch": 0.5240706886045094, + "grad_norm": 1.1425493955612183, + "learning_rate": 2.876999266970987e-05, + "loss": 0.8474, + "step": 215 + }, + { + "epoch": 0.5362583790371724, + "grad_norm": 1.0754973888397217, + "learning_rate": 2.8688959173811907e-05, + "loss": 0.7581, + "step": 220 + }, + { + "epoch": 0.5484460694698354, + "grad_norm": 1.010787010192871, + "learning_rate": 2.860546253347334e-05, + "loss": 0.7589, + "step": 225 + }, + { + "epoch": 0.5606337599024985, + "grad_norm": 1.2968950271606445, + "learning_rate": 2.8519517772793735e-05, + "loss": 0.7424, + "step": 230 + }, + { + "epoch": 0.5728214503351615, + "grad_norm": 1.0976183414459229, + "learning_rate": 2.8431140356379032e-05, + "loss": 0.8151, + "step": 235 + }, + { + "epoch": 0.5850091407678245, + "grad_norm": 1.2306115627288818, + "learning_rate": 2.8340346186558928e-05, + "loss": 0.727, + "step": 240 + }, + { + "epoch": 0.5971968312004875, + "grad_norm": 1.0912151336669922, + "learning_rate": 2.8247151600525454e-05, + "loss": 0.7057, + "step": 245 + }, + { + "epoch": 0.6093845216331505, + "grad_norm": 1.0420118570327759, + "learning_rate": 2.8151573367393293e-05, + "loss": 0.7463, + "step": 250 + }, + { + "epoch": 0.6215722120658135, + "grad_norm": 1.2320903539657593, + "learning_rate": 2.8053628685182446e-05, + "loss": 0.7013, + "step": 255 + }, + { + "epoch": 0.6337599024984766, + "grad_norm": 1.380292534828186, + "learning_rate": 2.7953335177723655e-05, + "loss": 0.698, + "step": 260 + }, + { + "epoch": 0.6459475929311396, + "grad_norm": 1.1457571983337402, + "learning_rate": 2.7850710891487227e-05, + "loss": 0.6593, + "step": 265 + }, + { + "epoch": 0.6581352833638026, + "grad_norm": 1.1487387418746948, + "learning_rate": 2.774577429233583e-05, + "loss": 0.6649, + "step": 270 + }, + { + "epoch": 0.6703229737964655, + "grad_norm": 1.0869392156600952, + "learning_rate": 2.7638544262201793e-05, + "loss": 0.6121, + "step": 275 + }, + { + "epoch": 0.6825106642291285, + "grad_norm": 1.2252732515335083, + "learning_rate": 2.7529040095689573e-05, + "loss": 0.6458, + "step": 280 + }, + { + "epoch": 0.6946983546617916, + "grad_norm": 1.624592900276184, + "learning_rate": 2.741728149660392e-05, + "loss": 0.6053, + "step": 285 + }, + { + "epoch": 0.7068860450944546, + "grad_norm": 1.1394425630569458, + "learning_rate": 2.7303288574404484e-05, + "loss": 0.5746, + "step": 290 + }, + { + "epoch": 0.7190737355271176, + "grad_norm": 1.1849138736724854, + "learning_rate": 2.7187081840587356e-05, + "loss": 0.6571, + "step": 295 + }, + { + "epoch": 0.7312614259597806, + "grad_norm": 1.225008249282837, + "learning_rate": 2.7068682204994304e-05, + "loss": 0.5994, + "step": 300 + }, + { + "epoch": 0.7434491163924436, + "grad_norm": 1.235156774520874, + "learning_rate": 2.694811097205034e-05, + "loss": 0.5997, + "step": 305 + }, + { + "epoch": 0.7556368068251066, + "grad_norm": 1.160176396369934, + "learning_rate": 2.682538983693027e-05, + "loss": 0.5984, + "step": 310 + }, + { + "epoch": 0.7678244972577697, + "grad_norm": 1.1850039958953857, + "learning_rate": 2.670054088165492e-05, + "loss": 0.5755, + "step": 315 + }, + { + "epoch": 0.7800121876904327, + "grad_norm": 1.2328600883483887, + "learning_rate": 2.657358657111781e-05, + "loss": 0.5894, + "step": 320 + }, + { + "epoch": 0.7921998781230957, + "grad_norm": 1.4245983362197876, + "learning_rate": 2.644454974904286e-05, + "loss": 0.5506, + "step": 325 + }, + { + "epoch": 0.8043875685557587, + "grad_norm": 1.1019421815872192, + "learning_rate": 2.6313453633874e-05, + "loss": 0.5714, + "step": 330 + }, + { + "epoch": 0.8165752589884216, + "grad_norm": 1.2707737684249878, + "learning_rate": 2.6180321814597293e-05, + "loss": 0.5641, + "step": 335 + }, + { + "epoch": 0.8287629494210847, + "grad_norm": 1.2542502880096436, + "learning_rate": 2.6045178246496433e-05, + "loss": 0.5721, + "step": 340 + }, + { + "epoch": 0.8409506398537477, + "grad_norm": 1.17782461643219, + "learning_rate": 2.590804724684232e-05, + "loss": 0.5579, + "step": 345 + }, + { + "epoch": 0.8531383302864107, + "grad_norm": 1.166025161743164, + "learning_rate": 2.576895349051747e-05, + "loss": 0.484, + "step": 350 + }, + { + "epoch": 0.8653260207190737, + "grad_norm": 1.349998950958252, + "learning_rate": 2.5627922005576115e-05, + "loss": 0.5459, + "step": 355 + }, + { + "epoch": 0.8775137111517367, + "grad_norm": 1.2635490894317627, + "learning_rate": 2.5484978168740744e-05, + "loss": 0.5782, + "step": 360 + }, + { + "epoch": 0.8897014015843998, + "grad_norm": 1.1924906969070435, + "learning_rate": 2.5340147700835898e-05, + "loss": 0.5116, + "step": 365 + }, + { + "epoch": 0.9018890920170628, + "grad_norm": 1.1594135761260986, + "learning_rate": 2.5193456662160043e-05, + "loss": 0.5144, + "step": 370 + }, + { + "epoch": 0.9140767824497258, + "grad_norm": 1.4871833324432373, + "learning_rate": 2.5044931447796388e-05, + "loss": 0.5273, + "step": 375 + }, + { + "epoch": 0.9262644728823888, + "grad_norm": 1.1811950206756592, + "learning_rate": 2.4894598782863434e-05, + "loss": 0.4518, + "step": 380 + }, + { + "epoch": 0.9384521633150518, + "grad_norm": 1.2533081769943237, + "learning_rate": 2.4742485717706142e-05, + "loss": 0.512, + "step": 385 + }, + { + "epoch": 0.9506398537477148, + "grad_norm": 1.1913623809814453, + "learning_rate": 2.4588619623028602e-05, + "loss": 0.4443, + "step": 390 + }, + { + "epoch": 0.9628275441803779, + "grad_norm": 1.3764586448669434, + "learning_rate": 2.443302818496903e-05, + "loss": 0.4665, + "step": 395 + }, + { + "epoch": 0.9750152346130408, + "grad_norm": 1.136022686958313, + "learning_rate": 2.4275739400118017e-05, + "loss": 0.4664, + "step": 400 + }, + { + "epoch": 0.9872029250457038, + "grad_norm": 1.385498285293579, + "learning_rate": 2.4116781570480926e-05, + "loss": 0.4692, + "step": 405 + }, + { + "epoch": 0.9993906154783668, + "grad_norm": 1.2087078094482422, + "learning_rate": 2.395618329838533e-05, + "loss": 0.4179, + "step": 410 + }, + { + "epoch": 1.0097501523461303, + "grad_norm": 1.2528502941131592, + "learning_rate": 2.3793973481334396e-05, + "loss": 0.357, + "step": 415 + }, + { + "epoch": 1.0219378427787935, + "grad_norm": 1.2057477235794067, + "learning_rate": 2.363018130680717e-05, + "loss": 0.3701, + "step": 420 + }, + { + "epoch": 1.0341255332114565, + "grad_norm": 1.338718056678772, + "learning_rate": 2.3464836247006684e-05, + "loss": 0.3896, + "step": 425 + }, + { + "epoch": 1.0463132236441195, + "grad_norm": 1.4432647228240967, + "learning_rate": 2.3297968053556838e-05, + "loss": 0.4252, + "step": 430 + }, + { + "epoch": 1.0585009140767825, + "grad_norm": 1.2510359287261963, + "learning_rate": 2.3129606752148977e-05, + "loss": 0.382, + "step": 435 + }, + { + "epoch": 1.0706886045094455, + "grad_norm": 1.3903076648712158, + "learning_rate": 2.2959782637139173e-05, + "loss": 0.3781, + "step": 440 + }, + { + "epoch": 1.0828762949421085, + "grad_norm": 1.2948212623596191, + "learning_rate": 2.2788526266097188e-05, + "loss": 0.3881, + "step": 445 + }, + { + "epoch": 1.0950639853747715, + "grad_norm": 1.2300318479537964, + "learning_rate": 2.261586845430801e-05, + "loss": 0.3929, + "step": 450 + }, + { + "epoch": 1.1072516758074344, + "grad_norm": 1.1943761110305786, + "learning_rate": 2.2441840269227093e-05, + "loss": 0.3545, + "step": 455 + }, + { + "epoch": 1.1194393662400974, + "grad_norm": 1.1213990449905396, + "learning_rate": 2.2266473024890152e-05, + "loss": 0.3915, + "step": 460 + }, + { + "epoch": 1.1316270566727604, + "grad_norm": 1.3181718587875366, + "learning_rate": 2.2089798276278652e-05, + "loss": 0.3498, + "step": 465 + }, + { + "epoch": 1.1438147471054236, + "grad_norm": 1.20970618724823, + "learning_rate": 2.1911847813641897e-05, + "loss": 0.3275, + "step": 470 + }, + { + "epoch": 1.1560024375380866, + "grad_norm": 1.259089469909668, + "learning_rate": 2.1732653656776802e-05, + "loss": 0.3378, + "step": 475 + }, + { + "epoch": 1.1681901279707496, + "grad_norm": 1.2737544775009155, + "learning_rate": 2.1552248049266365e-05, + "loss": 0.327, + "step": 480 + }, + { + "epoch": 1.1803778184034126, + "grad_norm": 1.2791353464126587, + "learning_rate": 2.1370663452677867e-05, + "loss": 0.347, + "step": 485 + }, + { + "epoch": 1.1925655088360756, + "grad_norm": 1.9227460622787476, + "learning_rate": 2.118793254072184e-05, + "loss": 0.3321, + "step": 490 + }, + { + "epoch": 1.2047531992687386, + "grad_norm": 1.2671159505844116, + "learning_rate": 2.100408819337289e-05, + "loss": 0.3152, + "step": 495 + }, + { + "epoch": 1.2169408897014016, + "grad_norm": 1.2174954414367676, + "learning_rate": 2.0819163490953355e-05, + "loss": 0.3549, + "step": 500 + }, + { + "epoch": 1.2291285801340646, + "grad_norm": 1.250701665878296, + "learning_rate": 2.0633191708180984e-05, + "loss": 0.3077, + "step": 505 + }, + { + "epoch": 1.2413162705667276, + "grad_norm": 1.3017607927322388, + "learning_rate": 2.0446206308181575e-05, + "loss": 0.3285, + "step": 510 + }, + { + "epoch": 1.2535039609993905, + "grad_norm": 1.2456412315368652, + "learning_rate": 2.0258240936467732e-05, + "loss": 0.3373, + "step": 515 + }, + { + "epoch": 1.2656916514320535, + "grad_norm": 1.344040036201477, + "learning_rate": 2.006932941488482e-05, + "loss": 0.3369, + "step": 520 + }, + { + "epoch": 1.2778793418647165, + "grad_norm": 1.2257285118103027, + "learning_rate": 1.987950573552517e-05, + "loss": 0.3193, + "step": 525 + }, + { + "epoch": 1.2900670322973795, + "grad_norm": 1.2541077136993408, + "learning_rate": 1.968880405461166e-05, + "loss": 0.2497, + "step": 530 + }, + { + "epoch": 1.3022547227300427, + "grad_norm": 1.2382172346115112, + "learning_rate": 1.9497258686351762e-05, + "loss": 0.2548, + "step": 535 + }, + { + "epoch": 1.3144424131627057, + "grad_norm": 1.1031371355056763, + "learning_rate": 1.930490409676316e-05, + "loss": 0.2736, + "step": 540 + }, + { + "epoch": 1.3266301035953687, + "grad_norm": 1.3122169971466064, + "learning_rate": 1.911177489747205e-05, + "loss": 0.2913, + "step": 545 + }, + { + "epoch": 1.3388177940280317, + "grad_norm": 1.2930282354354858, + "learning_rate": 1.8917905839485248e-05, + "loss": 0.2654, + "step": 550 + }, + { + "epoch": 1.3510054844606947, + "grad_norm": 1.3781739473342896, + "learning_rate": 1.8723331806937212e-05, + "loss": 0.2542, + "step": 555 + }, + { + "epoch": 1.3631931748933577, + "grad_norm": 1.132137417793274, + "learning_rate": 1.8528087810813108e-05, + "loss": 0.2708, + "step": 560 + }, + { + "epoch": 1.3753808653260207, + "grad_norm": 1.246435284614563, + "learning_rate": 1.833220898264905e-05, + "loss": 0.2719, + "step": 565 + }, + { + "epoch": 1.3875685557586837, + "grad_norm": 1.153387188911438, + "learning_rate": 1.8135730568210655e-05, + "loss": 0.299, + "step": 570 + }, + { + "epoch": 1.3997562461913469, + "grad_norm": 1.3090893030166626, + "learning_rate": 1.793868792115105e-05, + "loss": 0.3186, + "step": 575 + }, + { + "epoch": 1.4119439366240099, + "grad_norm": 1.2646890878677368, + "learning_rate": 1.7741116496649443e-05, + "loss": 0.2869, + "step": 580 + }, + { + "epoch": 1.4241316270566728, + "grad_norm": 1.102391004562378, + "learning_rate": 1.754305184503144e-05, + "loss": 0.3004, + "step": 585 + }, + { + "epoch": 1.4363193174893358, + "grad_norm": 1.2147096395492554, + "learning_rate": 1.7344529605372244e-05, + "loss": 0.2772, + "step": 590 + }, + { + "epoch": 1.4485070079219988, + "grad_norm": 1.1928784847259521, + "learning_rate": 1.71455854990839e-05, + "loss": 0.239, + "step": 595 + }, + { + "epoch": 1.4606946983546618, + "grad_norm": 1.2808955907821655, + "learning_rate": 1.6946255323487667e-05, + "loss": 0.2412, + "step": 600 + }, + { + "epoch": 1.4728823887873248, + "grad_norm": 1.1700308322906494, + "learning_rate": 1.674657494537281e-05, + "loss": 0.229, + "step": 605 + }, + { + "epoch": 1.4850700792199878, + "grad_norm": 1.3523163795471191, + "learning_rate": 1.6546580294542823e-05, + "loss": 0.2411, + "step": 610 + }, + { + "epoch": 1.4972577696526508, + "grad_norm": 1.0596638917922974, + "learning_rate": 1.6346307357350375e-05, + "loss": 0.2047, + "step": 615 + }, + { + "epoch": 1.5094454600853138, + "grad_norm": 1.1007838249206543, + "learning_rate": 1.614579217022201e-05, + "loss": 0.2201, + "step": 620 + }, + { + "epoch": 1.5216331505179768, + "grad_norm": 1.2097558975219727, + "learning_rate": 1.594507081317391e-05, + "loss": 0.213, + "step": 625 + }, + { + "epoch": 1.5338208409506398, + "grad_norm": 1.0720019340515137, + "learning_rate": 1.5744179403319752e-05, + "loss": 0.2579, + "step": 630 + }, + { + "epoch": 1.5460085313833027, + "grad_norm": 1.1256701946258545, + "learning_rate": 1.554315408837195e-05, + "loss": 0.2384, + "step": 635 + }, + { + "epoch": 1.5581962218159657, + "grad_norm": 1.2121301889419556, + "learning_rate": 1.534203104013733e-05, + "loss": 0.2131, + "step": 640 + }, + { + "epoch": 1.5703839122486287, + "grad_norm": 1.011202335357666, + "learning_rate": 1.5140846448008516e-05, + "loss": 0.1991, + "step": 645 + }, + { + "epoch": 1.582571602681292, + "grad_norm": 1.232985496520996, + "learning_rate": 1.4939636512452128e-05, + "loss": 0.2063, + "step": 650 + }, + { + "epoch": 1.594759293113955, + "grad_norm": 1.1507092714309692, + "learning_rate": 1.4738437438494997e-05, + "loss": 0.2147, + "step": 655 + }, + { + "epoch": 1.606946983546618, + "grad_norm": 1.2145538330078125, + "learning_rate": 1.4537285429209551e-05, + "loss": 0.2096, + "step": 660 + }, + { + "epoch": 1.619134673979281, + "grad_norm": 1.0780242681503296, + "learning_rate": 1.4336216679199563e-05, + "loss": 0.1932, + "step": 665 + }, + { + "epoch": 1.631322364411944, + "grad_norm": 1.3777366876602173, + "learning_rate": 1.4135267368087427e-05, + "loss": 0.1868, + "step": 670 + }, + { + "epoch": 1.643510054844607, + "grad_norm": 1.0735384225845337, + "learning_rate": 1.3934473654004096e-05, + "loss": 0.2093, + "step": 675 + }, + { + "epoch": 1.65569774527727, + "grad_norm": 1.2096396684646606, + "learning_rate": 1.3733871667082928e-05, + "loss": 0.2006, + "step": 680 + }, + { + "epoch": 1.667885435709933, + "grad_norm": 1.1222299337387085, + "learning_rate": 1.3533497502958574e-05, + "loss": 0.1757, + "step": 685 + }, + { + "epoch": 1.680073126142596, + "grad_norm": 1.5848546028137207, + "learning_rate": 1.3333387216272e-05, + "loss": 0.2019, + "step": 690 + }, + { + "epoch": 1.692260816575259, + "grad_norm": 1.1570056676864624, + "learning_rate": 1.3133576814182982e-05, + "loss": 0.2229, + "step": 695 + }, + { + "epoch": 1.704448507007922, + "grad_norm": 1.433230996131897, + "learning_rate": 1.29341022498911e-05, + "loss": 0.1976, + "step": 700 + }, + { + "epoch": 1.716636197440585, + "grad_norm": 1.2260408401489258, + "learning_rate": 1.273499941616642e-05, + "loss": 0.1692, + "step": 705 + }, + { + "epoch": 1.728823887873248, + "grad_norm": 1.17520272731781, + "learning_rate": 1.2536304138891069e-05, + "loss": 0.1962, + "step": 710 + }, + { + "epoch": 1.741011578305911, + "grad_norm": 1.2954957485198975, + "learning_rate": 1.2338052170612893e-05, + "loss": 0.2179, + "step": 715 + }, + { + "epoch": 1.753199268738574, + "grad_norm": 1.323201298713684, + "learning_rate": 1.214027918411221e-05, + "loss": 0.1825, + "step": 720 + }, + { + "epoch": 1.765386959171237, + "grad_norm": 1.0657851696014404, + "learning_rate": 1.1943020765983004e-05, + "loss": 0.1641, + "step": 725 + }, + { + "epoch": 1.7775746496039, + "grad_norm": 1.0832653045654297, + "learning_rate": 1.1746312410229595e-05, + "loss": 0.171, + "step": 730 + }, + { + "epoch": 1.789762340036563, + "grad_norm": 0.9748276472091675, + "learning_rate": 1.1550189511879957e-05, + "loss": 0.1719, + "step": 735 + }, + { + "epoch": 1.801950030469226, + "grad_norm": 1.1994144916534424, + "learning_rate": 1.1354687360616853e-05, + "loss": 0.1735, + "step": 740 + }, + { + "epoch": 1.814137720901889, + "grad_norm": 1.1446197032928467, + "learning_rate": 1.1159841134427966e-05, + "loss": 0.1495, + "step": 745 + }, + { + "epoch": 1.826325411334552, + "grad_norm": 1.3504618406295776, + "learning_rate": 1.0965685893276043e-05, + "loss": 0.1594, + "step": 750 + }, + { + "epoch": 1.8385131017672152, + "grad_norm": 1.3091683387756348, + "learning_rate": 1.077225657279036e-05, + "loss": 0.1832, + "step": 755 + }, + { + "epoch": 1.8507007921998782, + "grad_norm": 1.083187222480774, + "learning_rate": 1.0579587977980518e-05, + "loss": 0.1538, + "step": 760 + }, + { + "epoch": 1.8628884826325411, + "grad_norm": 1.1784716844558716, + "learning_rate": 1.0387714776973735e-05, + "loss": 0.1663, + "step": 765 + }, + { + "epoch": 1.8750761730652041, + "grad_norm": 1.0920982360839844, + "learning_rate": 1.0196671494776792e-05, + "loss": 0.1941, + "step": 770 + }, + { + "epoch": 1.8872638634978671, + "grad_norm": 1.0993956327438354, + "learning_rate": 1.0006492507063739e-05, + "loss": 0.1583, + "step": 775 + }, + { + "epoch": 1.8994515539305301, + "grad_norm": 1.127646565437317, + "learning_rate": 9.817212033990413e-06, + "loss": 0.16, + "step": 780 + }, + { + "epoch": 1.9116392443631933, + "grad_norm": 1.1468029022216797, + "learning_rate": 9.62886413403701e-06, + "loss": 0.1443, + "step": 785 + }, + { + "epoch": 1.9238269347958563, + "grad_norm": 1.021328330039978, + "learning_rate": 9.441482697879722e-06, + "loss": 0.1458, + "step": 790 + }, + { + "epoch": 1.9360146252285193, + "grad_norm": 1.354783296585083, + "learning_rate": 9.255101442292546e-06, + "loss": 0.152, + "step": 795 + }, + { + "epoch": 1.9482023156611823, + "grad_norm": 1.111009120941162, + "learning_rate": 9.06975390408041e-06, + "loss": 0.1783, + "step": 800 + }, + { + "epoch": 1.9603900060938453, + "grad_norm": 1.024790644645691, + "learning_rate": 8.885473434044688e-06, + "loss": 0.1483, + "step": 805 + }, + { + "epoch": 1.9725776965265083, + "grad_norm": 1.1695469617843628, + "learning_rate": 8.702293190982147e-06, + "loss": 0.1397, + "step": 810 + }, + { + "epoch": 1.9847653869591713, + "grad_norm": 1.139601469039917, + "learning_rate": 8.520246135718484e-06, + "loss": 0.1504, + "step": 815 + }, + { + "epoch": 1.9969530773918343, + "grad_norm": 0.9508364796638489, + "learning_rate": 8.339365025177473e-06, + "loss": 0.1574, + "step": 820 + }, + { + "epoch": 2.0073126142595976, + "grad_norm": 1.178868293762207, + "learning_rate": 8.15968240648678e-06, + "loss": 0.147, + "step": 825 + }, + { + "epoch": 2.0195003046922606, + "grad_norm": 0.8593213558197021, + "learning_rate": 7.981230611121542e-06, + "loss": 0.1063, + "step": 830 + }, + { + "epoch": 2.031687995124924, + "grad_norm": 0.9431029558181763, + "learning_rate": 7.804041749086772e-06, + "loss": 0.1237, + "step": 835 + }, + { + "epoch": 2.043875685557587, + "grad_norm": 1.1720589399337769, + "learning_rate": 7.628147703139593e-06, + "loss": 0.1149, + "step": 840 + }, + { + "epoch": 2.05606337599025, + "grad_norm": 1.0951436758041382, + "learning_rate": 7.4535801230523315e-06, + "loss": 0.1052, + "step": 845 + }, + { + "epoch": 2.068251066422913, + "grad_norm": 0.9661635756492615, + "learning_rate": 7.280370419917604e-06, + "loss": 0.1025, + "step": 850 + }, + { + "epoch": 2.080438756855576, + "grad_norm": 1.0057296752929688, + "learning_rate": 7.108549760496305e-06, + "loss": 0.1055, + "step": 855 + }, + { + "epoch": 2.092626447288239, + "grad_norm": 1.1186891794204712, + "learning_rate": 6.9381490616095364e-06, + "loss": 0.1308, + "step": 860 + }, + { + "epoch": 2.104814137720902, + "grad_norm": 1.0666916370391846, + "learning_rate": 6.769198984575583e-06, + "loss": 0.1143, + "step": 865 + }, + { + "epoch": 2.117001828153565, + "grad_norm": 1.151049017906189, + "learning_rate": 6.601729929692801e-06, + "loss": 0.1142, + "step": 870 + }, + { + "epoch": 2.129189518586228, + "grad_norm": 0.9494882225990295, + "learning_rate": 6.435772030769476e-06, + "loss": 0.0954, + "step": 875 + }, + { + "epoch": 2.141377209018891, + "grad_norm": 0.8800376057624817, + "learning_rate": 6.271355149701678e-06, + "loss": 0.1177, + "step": 880 + }, + { + "epoch": 2.153564899451554, + "grad_norm": 1.005824089050293, + "learning_rate": 6.108508871100003e-06, + "loss": 0.0897, + "step": 885 + }, + { + "epoch": 2.165752589884217, + "grad_norm": 0.960064709186554, + "learning_rate": 5.947262496966196e-06, + "loss": 0.1084, + "step": 890 + }, + { + "epoch": 2.17794028031688, + "grad_norm": 0.9508167505264282, + "learning_rate": 5.787645041420706e-06, + "loss": 0.101, + "step": 895 + }, + { + "epoch": 2.190127970749543, + "grad_norm": 0.9472805261611938, + "learning_rate": 5.62968522548192e-06, + "loss": 0.1023, + "step": 900 + }, + { + "epoch": 2.202315661182206, + "grad_norm": 0.8249635696411133, + "learning_rate": 5.473411471898227e-06, + "loss": 0.1039, + "step": 905 + }, + { + "epoch": 2.214503351614869, + "grad_norm": 0.8757342100143433, + "learning_rate": 5.3188519000337615e-06, + "loss": 0.0896, + "step": 910 + }, + { + "epoch": 2.226691042047532, + "grad_norm": 0.9892808794975281, + "learning_rate": 5.1660343208086405e-06, + "loss": 0.0896, + "step": 915 + }, + { + "epoch": 2.238878732480195, + "grad_norm": 0.8112608790397644, + "learning_rate": 5.014986231694786e-06, + "loss": 0.0965, + "step": 920 + }, + { + "epoch": 2.251066422912858, + "grad_norm": 0.8900179266929626, + "learning_rate": 4.8657348117681615e-06, + "loss": 0.1166, + "step": 925 + }, + { + "epoch": 2.263254113345521, + "grad_norm": 0.9072511792182922, + "learning_rate": 4.718306916818195e-06, + "loss": 0.095, + "step": 930 + }, + { + "epoch": 2.2754418037781843, + "grad_norm": 1.0779333114624023, + "learning_rate": 4.572729074515491e-06, + "loss": 0.0883, + "step": 935 + }, + { + "epoch": 2.2876294942108473, + "grad_norm": 0.8935828804969788, + "learning_rate": 4.429027479638517e-06, + "loss": 0.0962, + "step": 940 + }, + { + "epoch": 2.2998171846435103, + "grad_norm": 0.8935207724571228, + "learning_rate": 4.287227989360188e-06, + "loss": 0.1013, + "step": 945 + }, + { + "epoch": 2.3120048750761732, + "grad_norm": 0.8225792646408081, + "learning_rate": 4.147356118595249e-06, + "loss": 0.1024, + "step": 950 + }, + { + "epoch": 2.3241925655088362, + "grad_norm": 0.9776331186294556, + "learning_rate": 4.009437035409185e-06, + "loss": 0.1021, + "step": 955 + }, + { + "epoch": 2.3363802559414992, + "grad_norm": 0.8273106813430786, + "learning_rate": 3.8734955564895535e-06, + "loss": 0.0826, + "step": 960 + }, + { + "epoch": 2.348567946374162, + "grad_norm": 0.9962129592895508, + "learning_rate": 3.739556142680606e-06, + "loss": 0.0946, + "step": 965 + }, + { + "epoch": 2.360755636806825, + "grad_norm": 0.7364387512207031, + "learning_rate": 3.607642894581823e-06, + "loss": 0.0793, + "step": 970 + }, + { + "epoch": 2.372943327239488, + "grad_norm": 0.7641225457191467, + "learning_rate": 3.4777795482113693e-06, + "loss": 0.0859, + "step": 975 + }, + { + "epoch": 2.385131017672151, + "grad_norm": 0.8125916719436646, + "learning_rate": 3.349989470735134e-06, + "loss": 0.0956, + "step": 980 + }, + { + "epoch": 2.397318708104814, + "grad_norm": 0.863740086555481, + "learning_rate": 3.2242956562620755e-06, + "loss": 0.1088, + "step": 985 + }, + { + "epoch": 2.409506398537477, + "grad_norm": 1.035588026046753, + "learning_rate": 3.10072072170677e-06, + "loss": 0.0995, + "step": 990 + }, + { + "epoch": 2.42169408897014, + "grad_norm": 0.8617992401123047, + "learning_rate": 2.979286902719815e-06, + "loss": 0.1051, + "step": 995 + }, + { + "epoch": 2.433881779402803, + "grad_norm": 0.6262903809547424, + "learning_rate": 2.8600160496867796e-06, + "loss": 0.0821, + "step": 1000 + }, + { + "epoch": 2.446069469835466, + "grad_norm": 0.7943485975265503, + "learning_rate": 2.7429296237965595e-06, + "loss": 0.0791, + "step": 1005 + }, + { + "epoch": 2.458257160268129, + "grad_norm": 0.6565088629722595, + "learning_rate": 2.6280486931797137e-06, + "loss": 0.0794, + "step": 1010 + }, + { + "epoch": 2.470444850700792, + "grad_norm": 0.871501624584198, + "learning_rate": 2.5153939291175152e-06, + "loss": 0.0766, + "step": 1015 + }, + { + "epoch": 2.482632541133455, + "grad_norm": 0.7739421725273132, + "learning_rate": 2.4049856023224696e-06, + "loss": 0.0901, + "step": 1020 + }, + { + "epoch": 2.494820231566118, + "grad_norm": 0.7205222845077515, + "learning_rate": 2.2968435792908465e-06, + "loss": 0.0849, + "step": 1025 + }, + { + "epoch": 2.507007921998781, + "grad_norm": 0.7185097932815552, + "learning_rate": 2.190987318727968e-06, + "loss": 0.0862, + "step": 1030 + }, + { + "epoch": 2.519195612431444, + "grad_norm": 0.8353011012077332, + "learning_rate": 2.0874358680468953e-06, + "loss": 0.0826, + "step": 1035 + }, + { + "epoch": 2.531383302864107, + "grad_norm": 1.0119293928146362, + "learning_rate": 1.986207859941092e-06, + "loss": 0.1151, + "step": 1040 + }, + { + "epoch": 2.54357099329677, + "grad_norm": 0.734177827835083, + "learning_rate": 1.8873215090317082e-06, + "loss": 0.0843, + "step": 1045 + }, + { + "epoch": 2.555758683729433, + "grad_norm": 0.7243436574935913, + "learning_rate": 1.7907946085901262e-06, + "loss": 0.1017, + "step": 1050 + }, + { + "epoch": 2.567946374162096, + "grad_norm": 0.7187703847885132, + "learning_rate": 1.6966445273362952e-06, + "loss": 0.0906, + "step": 1055 + }, + { + "epoch": 2.580134064594759, + "grad_norm": 1.7860711812973022, + "learning_rate": 1.6048882063134419e-06, + "loss": 0.0942, + "step": 1060 + }, + { + "epoch": 2.5923217550274225, + "grad_norm": 0.8210919499397278, + "learning_rate": 1.5155421558397869e-06, + "loss": 0.087, + "step": 1065 + }, + { + "epoch": 2.6045094454600854, + "grad_norm": 0.8615983724594116, + "learning_rate": 1.4286224525377174e-06, + "loss": 0.0765, + "step": 1070 + }, + { + "epoch": 2.6166971358927484, + "grad_norm": 0.8617960214614868, + "learning_rate": 1.3441447364410226e-06, + "loss": 0.0844, + "step": 1075 + }, + { + "epoch": 2.6288848263254114, + "grad_norm": 0.8989661931991577, + "learning_rate": 1.2621242081806667e-06, + "loss": 0.0975, + "step": 1080 + }, + { + "epoch": 2.6410725167580744, + "grad_norm": 0.6161676049232483, + "learning_rate": 1.1825756262496556e-06, + "loss": 0.0902, + "step": 1085 + }, + { + "epoch": 2.6532602071907374, + "grad_norm": 0.6542766690254211, + "learning_rate": 1.1055133043474385e-06, + "loss": 0.0856, + "step": 1090 + }, + { + "epoch": 2.6654478976234004, + "grad_norm": 0.6571939587593079, + "learning_rate": 1.0309511088043394e-06, + "loss": 0.0792, + "step": 1095 + }, + { + "epoch": 2.6776355880560634, + "grad_norm": 0.6726952195167542, + "learning_rate": 9.589024560865145e-07, + "loss": 0.0768, + "step": 1100 + }, + { + "epoch": 2.6898232784887264, + "grad_norm": 0.855098307132721, + "learning_rate": 8.893803103818304e-07, + "loss": 0.0876, + "step": 1105 + }, + { + "epoch": 2.7020109689213894, + "grad_norm": 0.7534647583961487, + "learning_rate": 8.22397181267125e-07, + "loss": 0.0838, + "step": 1110 + }, + { + "epoch": 2.7141986593540524, + "grad_norm": 0.7975372672080994, + "learning_rate": 7.579651214572941e-07, + "loss": 0.0822, + "step": 1115 + }, + { + "epoch": 2.7263863497867153, + "grad_norm": 0.738946259021759, + "learning_rate": 6.960957246365557e-07, + "loss": 0.0829, + "step": 1120 + }, + { + "epoch": 2.7385740402193783, + "grad_norm": 0.7749128341674805, + "learning_rate": 6.368001233723192e-07, + "loss": 0.0856, + "step": 1125 + }, + { + "epoch": 2.7507617306520413, + "grad_norm": 0.5243741869926453, + "learning_rate": 5.800889871120418e-07, + "loss": 0.0886, + "step": 1130 + }, + { + "epoch": 2.7629494210847043, + "grad_norm": 0.6605753898620605, + "learning_rate": 5.259725202633942e-07, + "loss": 0.0855, + "step": 1135 + }, + { + "epoch": 2.7751371115173673, + "grad_norm": 0.6109983325004578, + "learning_rate": 4.7446046035811373e-07, + "loss": 0.0706, + "step": 1140 + }, + { + "epoch": 2.7873248019500307, + "grad_norm": 0.6517139673233032, + "learning_rate": 4.2556207629988187e-07, + "loss": 0.0737, + "step": 1145 + }, + { + "epoch": 2.7995124923826937, + "grad_norm": 0.7427161335945129, + "learning_rate": 3.792861666964842e-07, + "loss": 0.0995, + "step": 1150 + }, + { + "epoch": 2.8117001828153567, + "grad_norm": 0.7265890836715698, + "learning_rate": 3.3564105827663893e-07, + "loss": 0.0947, + "step": 1155 + }, + { + "epoch": 2.8238878732480197, + "grad_norm": 0.6422916650772095, + "learning_rate": 2.946346043917136e-07, + "loss": 0.0792, + "step": 1160 + }, + { + "epoch": 2.8360755636806827, + "grad_norm": 0.7853269577026367, + "learning_rate": 2.5627418360260225e-07, + "loss": 0.08, + "step": 1165 + }, + { + "epoch": 2.8482632541133457, + "grad_norm": 0.7644032835960388, + "learning_rate": 2.2056669835206878e-07, + "loss": 0.086, + "step": 1170 + }, + { + "epoch": 2.8604509445460087, + "grad_norm": 0.6570528149604797, + "learning_rate": 1.8751857372274416e-07, + "loss": 0.0711, + "step": 1175 + }, + { + "epoch": 2.8726386349786717, + "grad_norm": 0.6165603399276733, + "learning_rate": 1.5713575628101274e-07, + "loss": 0.0812, + "step": 1180 + }, + { + "epoch": 2.8848263254113347, + "grad_norm": 0.7058124542236328, + "learning_rate": 1.294237130070064e-07, + "loss": 0.0872, + "step": 1185 + }, + { + "epoch": 2.8970140158439976, + "grad_norm": 0.754649817943573, + "learning_rate": 1.0438743031090325e-07, + "loss": 0.0786, + "step": 1190 + }, + { + "epoch": 2.9092017062766606, + "grad_norm": 0.5910012722015381, + "learning_rate": 8.203141313568363e-08, + "loss": 0.0763, + "step": 1195 + }, + { + "epoch": 2.9213893967093236, + "grad_norm": 0.715380847454071, + "learning_rate": 6.235968414652848e-08, + "loss": 0.0901, + "step": 1200 + }, + { + "epoch": 2.9335770871419866, + "grad_norm": 0.6660782098770142, + "learning_rate": 4.537578300699008e-08, + "loss": 0.0856, + "step": 1205 + }, + { + "epoch": 2.9457647775746496, + "grad_norm": 0.7636271715164185, + "learning_rate": 3.108276574208646e-08, + "loss": 0.0804, + "step": 1210 + }, + { + "epoch": 2.9579524680073126, + "grad_norm": 0.6278764009475708, + "learning_rate": 1.9483204188402993e-08, + "loss": 0.0713, + "step": 1215 + }, + { + "epoch": 2.9701401584399756, + "grad_norm": 0.6992282867431641, + "learning_rate": 1.0579185531324198e-08, + "loss": 0.0813, + "step": 1220 + }, + { + "epoch": 2.9823278488726386, + "grad_norm": 0.7811171412467957, + "learning_rate": 4.372311929482509e-09, + "loss": 0.0932, + "step": 1225 + }, + { + "epoch": 2.9945155393053016, + "grad_norm": 0.6726181507110596, + "learning_rate": 8.637002264555438e-10, + "loss": 0.0858, + "step": 1230 + }, + { + "epoch": 3.0, + "step": 1233, + "total_flos": 1.5604276376528486e+18, + "train_loss": 0.4172145333700033, + "train_runtime": 1220.5094, + "train_samples_per_second": 32.259, + "train_steps_per_second": 1.01 + } + ], + "logging_steps": 5, + "max_steps": 1233, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5604276376528486e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41b34f2deed99f184971664208ec69d7c24600c1 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/2_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6443466ffe5898434694139f9e85f116b6b9cca8b4fdf6b4f82c35276ffb689 +size 8273 diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f467029c533afe2e50b32ce3d189ca89cdb605f7 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 3_128_e3_3e-5 + results: [] +--- + + + +# 3_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d58a79485e2b4da98b7c39155d59b7be369fd057 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "o_proj", + "up_proj", + "k_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc6a26b8b7937a1b8fe385ab7c5064e1ca66228b --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94ab82437cc21e56615424e305ff2b364b3a6e8e00b57a2057af5dddfb3f7dd5 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d0f5c930c75455aeb257b207764d3858114cd347 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5090307282417746e+18, + "train_loss": 0.4524924519473384, + "train_runtime": 1194.3141, + "train_samples": 12711, + "train_samples_per_second": 31.929, + "train_steps_per_second": 1.0 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/chat_template.jinja b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d0f5c930c75455aeb257b207764d3858114cd347 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5090307282417746e+18, + "train_loss": 0.4524924519473384, + "train_runtime": 1194.3141, + "train_samples": 12711, + "train_samples_per_second": 31.929, + "train_steps_per_second": 1.0 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..33186f12d0df1714eaeb0d241670c7b11847c4dd --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1709 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1194, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012586532410320957, + "grad_norm": 0.6589317917823792, + "learning_rate": 2e-06, + "loss": 1.7276, + "step": 5 + }, + { + "epoch": 0.025173064820641914, + "grad_norm": 0.5580178499221802, + "learning_rate": 4.5e-06, + "loss": 1.7098, + "step": 10 + }, + { + "epoch": 0.03775959723096287, + "grad_norm": 0.5894241333007812, + "learning_rate": 7e-06, + "loss": 1.7393, + "step": 15 + }, + { + "epoch": 0.05034612964128383, + "grad_norm": 0.515548825263977, + "learning_rate": 9.5e-06, + "loss": 1.6521, + "step": 20 + }, + { + "epoch": 0.06293266205160478, + "grad_norm": 0.49391722679138184, + "learning_rate": 1.2e-05, + "loss": 1.6421, + "step": 25 + }, + { + "epoch": 0.07551919446192575, + "grad_norm": 0.48455166816711426, + "learning_rate": 1.45e-05, + "loss": 1.6444, + "step": 30 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 0.4707586169242859, + "learning_rate": 1.7e-05, + "loss": 1.6133, + "step": 35 + }, + { + "epoch": 0.10069225928256766, + "grad_norm": 0.4650135040283203, + "learning_rate": 1.95e-05, + "loss": 1.5772, + "step": 40 + }, + { + "epoch": 0.11327879169288861, + "grad_norm": 0.4993765354156494, + "learning_rate": 2.2e-05, + "loss": 1.508, + "step": 45 + }, + { + "epoch": 0.12586532410320955, + "grad_norm": 0.45921289920806885, + "learning_rate": 2.45e-05, + "loss": 1.5558, + "step": 50 + }, + { + "epoch": 0.13845185651353054, + "grad_norm": 0.4902050197124481, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.5672, + "step": 55 + }, + { + "epoch": 0.1510383889238515, + "grad_norm": 0.5363878607749939, + "learning_rate": 2.95e-05, + "loss": 1.5336, + "step": 60 + }, + { + "epoch": 0.16362492133417245, + "grad_norm": 0.5965032577514648, + "learning_rate": 2.999907901949333e-05, + "loss": 1.4482, + "step": 65 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 0.5876983404159546, + "learning_rate": 2.9995337730012244e-05, + "loss": 1.4275, + "step": 70 + }, + { + "epoch": 0.18879798615481436, + "grad_norm": 0.5950630903244019, + "learning_rate": 2.9988719287563452e-05, + "loss": 1.3954, + "step": 75 + }, + { + "epoch": 0.2013845185651353, + "grad_norm": 0.6117998957633972, + "learning_rate": 2.9979224962026403e-05, + "loss": 1.4015, + "step": 80 + }, + { + "epoch": 0.21397105097545627, + "grad_norm": 0.6703667640686035, + "learning_rate": 2.9966856575075773e-05, + "loss": 1.3671, + "step": 85 + }, + { + "epoch": 0.22655758338577722, + "grad_norm": 0.6466063857078552, + "learning_rate": 2.9951616499831916e-05, + "loss": 1.3155, + "step": 90 + }, + { + "epoch": 0.23914411579609818, + "grad_norm": 0.7461403012275696, + "learning_rate": 2.9933507660405544e-05, + "loss": 1.2783, + "step": 95 + }, + { + "epoch": 0.2517306482064191, + "grad_norm": 0.7432316541671753, + "learning_rate": 2.9912533531336682e-05, + "loss": 1.261, + "step": 100 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 0.8127795457839966, + "learning_rate": 2.9888698136928e-05, + "loss": 1.2727, + "step": 105 + }, + { + "epoch": 0.27690371302706107, + "grad_norm": 0.6656081676483154, + "learning_rate": 2.986200605047268e-05, + "loss": 1.2099, + "step": 110 + }, + { + "epoch": 0.289490245437382, + "grad_norm": 0.699529767036438, + "learning_rate": 2.9832462393376926e-05, + "loss": 1.1654, + "step": 115 + }, + { + "epoch": 0.302076777847703, + "grad_norm": 0.7588850259780884, + "learning_rate": 2.980007283417734e-05, + "loss": 1.1937, + "step": 120 + }, + { + "epoch": 0.3146633102580239, + "grad_norm": 0.7396597266197205, + "learning_rate": 2.9764843587453284e-05, + "loss": 1.109, + "step": 125 + }, + { + "epoch": 0.3272498426683449, + "grad_norm": 0.7869383096694946, + "learning_rate": 2.9726781412634488e-05, + "loss": 1.1296, + "step": 130 + }, + { + "epoch": 0.3398363750786658, + "grad_norm": 0.7190852165222168, + "learning_rate": 2.9685893612704136e-05, + "loss": 1.1082, + "step": 135 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 0.8240865468978882, + "learning_rate": 2.9642188032797633e-05, + "loss": 1.131, + "step": 140 + }, + { + "epoch": 0.36500943989930773, + "grad_norm": 0.9610931277275085, + "learning_rate": 2.959567305869736e-05, + "loss": 1.0961, + "step": 145 + }, + { + "epoch": 0.3775959723096287, + "grad_norm": 0.9877774119377136, + "learning_rate": 2.954635761522369e-05, + "loss": 1.0538, + "step": 150 + }, + { + "epoch": 0.39018250471994964, + "grad_norm": 0.9228981137275696, + "learning_rate": 2.949425116452261e-05, + "loss": 1.0084, + "step": 155 + }, + { + "epoch": 0.4027690371302706, + "grad_norm": 0.9459630250930786, + "learning_rate": 2.943936370425018e-05, + "loss": 1.0222, + "step": 160 + }, + { + "epoch": 0.41535556954059155, + "grad_norm": 0.8710804581642151, + "learning_rate": 2.9381705765654322e-05, + "loss": 0.9928, + "step": 165 + }, + { + "epoch": 0.42794210195091253, + "grad_norm": 1.2564420700073242, + "learning_rate": 2.932128841155416e-05, + "loss": 0.9553, + "step": 170 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 1.009724736213684, + "learning_rate": 2.9258123234217437e-05, + "loss": 0.9295, + "step": 175 + }, + { + "epoch": 0.45311516677155445, + "grad_norm": 1.0067193508148193, + "learning_rate": 2.919222235313626e-05, + "loss": 1.0084, + "step": 180 + }, + { + "epoch": 0.4657016991818754, + "grad_norm": 1.034940242767334, + "learning_rate": 2.912359841270177e-05, + "loss": 0.9812, + "step": 185 + }, + { + "epoch": 0.47828823159219636, + "grad_norm": 1.0376389026641846, + "learning_rate": 2.9052264579778065e-05, + "loss": 0.8958, + "step": 190 + }, + { + "epoch": 0.4908747640025173, + "grad_norm": 0.9745414853096008, + "learning_rate": 2.8978234541175855e-05, + "loss": 0.916, + "step": 195 + }, + { + "epoch": 0.5034612964128382, + "grad_norm": 1.0300168991088867, + "learning_rate": 2.890152250102639e-05, + "loss": 0.9172, + "step": 200 + }, + { + "epoch": 0.5160478288231592, + "grad_norm": 1.135670781135559, + "learning_rate": 2.8822143178056115e-05, + "loss": 1.0121, + "step": 205 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 1.0926744937896729, + "learning_rate": 2.8740111802762587e-05, + "loss": 0.8862, + "step": 210 + }, + { + "epoch": 0.5412208936438011, + "grad_norm": 1.1367942094802856, + "learning_rate": 2.86554441144922e-05, + "loss": 0.7998, + "step": 215 + }, + { + "epoch": 0.5538074260541221, + "grad_norm": 1.071470856666565, + "learning_rate": 2.856815635842029e-05, + "loss": 0.8759, + "step": 220 + }, + { + "epoch": 0.5663939584644431, + "grad_norm": 0.9868183732032776, + "learning_rate": 2.8478265282434157e-05, + "loss": 0.7767, + "step": 225 + }, + { + "epoch": 0.578980490874764, + "grad_norm": 1.1197044849395752, + "learning_rate": 2.8385788133919676e-05, + "loss": 0.8248, + "step": 230 + }, + { + "epoch": 0.5915670232850849, + "grad_norm": 1.1087366342544556, + "learning_rate": 2.8290742656452016e-05, + "loss": 0.7448, + "step": 235 + }, + { + "epoch": 0.604153555695406, + "grad_norm": 1.1718958616256714, + "learning_rate": 2.8193147086391206e-05, + "loss": 0.8333, + "step": 240 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 1.1547728776931763, + "learning_rate": 2.809302014938312e-05, + "loss": 0.8039, + "step": 245 + }, + { + "epoch": 0.6293266205160478, + "grad_norm": 1.3205927610397339, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.7554, + "step": 250 + }, + { + "epoch": 0.6419131529263687, + "grad_norm": 1.1147249937057495, + "learning_rate": 2.78852495018873e-05, + "loss": 0.8704, + "step": 255 + }, + { + "epoch": 0.6544996853366898, + "grad_norm": 1.1258387565612793, + "learning_rate": 2.7777645656319298e-05, + "loss": 0.6494, + "step": 260 + }, + { + "epoch": 0.6670862177470107, + "grad_norm": 1.138344168663025, + "learning_rate": 2.7667590165994617e-05, + "loss": 0.7314, + "step": 265 + }, + { + "epoch": 0.6796727501573316, + "grad_norm": 1.2998223304748535, + "learning_rate": 2.7555104147241967e-05, + "loss": 0.7905, + "step": 270 + }, + { + "epoch": 0.6922592825676526, + "grad_norm": 1.4504525661468506, + "learning_rate": 2.7440209182735144e-05, + "loss": 0.7074, + "step": 275 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 1.257336974143982, + "learning_rate": 2.7322927317351962e-05, + "loss": 0.7036, + "step": 280 + }, + { + "epoch": 0.7174323473882945, + "grad_norm": 1.1925567388534546, + "learning_rate": 2.7203281053944512e-05, + "loss": 0.6533, + "step": 285 + }, + { + "epoch": 0.7300188797986155, + "grad_norm": 1.2786394357681274, + "learning_rate": 2.7081293349021558e-05, + "loss": 0.676, + "step": 290 + }, + { + "epoch": 0.7426054122089364, + "grad_norm": 1.2048102617263794, + "learning_rate": 2.6956987608343837e-05, + "loss": 0.589, + "step": 295 + }, + { + "epoch": 0.7551919446192574, + "grad_norm": 1.2388830184936523, + "learning_rate": 2.683038768243324e-05, + "loss": 0.6531, + "step": 300 + }, + { + "epoch": 0.7677784770295784, + "grad_norm": 1.2565765380859375, + "learning_rate": 2.670151786199659e-05, + "loss": 0.7255, + "step": 305 + }, + { + "epoch": 0.7803650094398993, + "grad_norm": 1.2759621143341064, + "learning_rate": 2.6570402873264996e-05, + "loss": 0.6313, + "step": 310 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 1.233203411102295, + "learning_rate": 2.6437067873249648e-05, + "loss": 0.6071, + "step": 315 + }, + { + "epoch": 0.8055380742605412, + "grad_norm": 1.2961400747299194, + "learning_rate": 2.630153844491491e-05, + "loss": 0.6662, + "step": 320 + }, + { + "epoch": 0.8181246066708622, + "grad_norm": 1.180242657661438, + "learning_rate": 2.6163840592269775e-05, + "loss": 0.6328, + "step": 325 + }, + { + "epoch": 0.8307111390811831, + "grad_norm": 1.341489315032959, + "learning_rate": 2.6024000735378423e-05, + "loss": 0.6421, + "step": 330 + }, + { + "epoch": 0.8432976714915041, + "grad_norm": 1.3508049249649048, + "learning_rate": 2.5882045705291054e-05, + "loss": 0.5442, + "step": 335 + }, + { + "epoch": 0.8558842039018251, + "grad_norm": 1.2327271699905396, + "learning_rate": 2.5738002738895777e-05, + "loss": 0.5584, + "step": 340 + }, + { + "epoch": 0.868470736312146, + "grad_norm": 1.2971057891845703, + "learning_rate": 2.559189947369272e-05, + "loss": 0.5359, + "step": 345 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 1.4613176584243774, + "learning_rate": 2.5443763942491176e-05, + "loss": 0.5234, + "step": 350 + }, + { + "epoch": 0.893643801132788, + "grad_norm": 1.2953211069107056, + "learning_rate": 2.5293624568031008e-05, + "loss": 0.5266, + "step": 355 + }, + { + "epoch": 0.9062303335431089, + "grad_norm": 1.3033990859985352, + "learning_rate": 2.514151015752912e-05, + "loss": 0.5752, + "step": 360 + }, + { + "epoch": 0.9188168659534298, + "grad_norm": 1.4920623302459717, + "learning_rate": 2.4987449897152285e-05, + "loss": 0.5359, + "step": 365 + }, + { + "epoch": 0.9314033983637507, + "grad_norm": 1.2819193601608276, + "learning_rate": 2.4831473346417153e-05, + "loss": 0.4982, + "step": 370 + }, + { + "epoch": 0.9439899307740718, + "grad_norm": 1.3604024648666382, + "learning_rate": 2.467361043251869e-05, + "loss": 0.525, + "step": 375 + }, + { + "epoch": 0.9565764631843927, + "grad_norm": 1.2891862392425537, + "learning_rate": 2.4513891444588046e-05, + "loss": 0.5817, + "step": 380 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 1.4935251474380493, + "learning_rate": 2.4352347027881003e-05, + "loss": 0.5702, + "step": 385 + }, + { + "epoch": 0.9817495280050346, + "grad_norm": 1.5232638120651245, + "learning_rate": 2.4189008177898044e-05, + "loss": 0.4887, + "step": 390 + }, + { + "epoch": 0.9943360604153556, + "grad_norm": 1.2169522047042847, + "learning_rate": 2.402390623443727e-05, + "loss": 0.4958, + "step": 395 + }, + { + "epoch": 1.0050346129641283, + "grad_norm": 1.593054175376892, + "learning_rate": 2.3857072875581247e-05, + "loss": 0.4452, + "step": 400 + }, + { + "epoch": 1.0176211453744493, + "grad_norm": 1.316535234451294, + "learning_rate": 2.368854011161892e-05, + "loss": 0.4294, + "step": 405 + }, + { + "epoch": 1.0302076777847704, + "grad_norm": 1.6156790256500244, + "learning_rate": 2.3518340278903796e-05, + "loss": 0.4177, + "step": 410 + }, + { + "epoch": 1.0427942101950913, + "grad_norm": 1.360829472541809, + "learning_rate": 2.3346506033649618e-05, + "loss": 0.4339, + "step": 415 + }, + { + "epoch": 1.0553807426054123, + "grad_norm": 1.2489149570465088, + "learning_rate": 2.317307034566456e-05, + "loss": 0.391, + "step": 420 + }, + { + "epoch": 1.0679672750157332, + "grad_norm": 1.3759737014770508, + "learning_rate": 2.2998066492025372e-05, + "loss": 0.4058, + "step": 425 + }, + { + "epoch": 1.0805538074260541, + "grad_norm": 1.3407951593399048, + "learning_rate": 2.282152805069247e-05, + "loss": 0.3908, + "step": 430 + }, + { + "epoch": 1.093140339836375, + "grad_norm": 1.1787053346633911, + "learning_rate": 2.264348889406738e-05, + "loss": 0.4293, + "step": 435 + }, + { + "epoch": 1.105726872246696, + "grad_norm": 1.3422763347625732, + "learning_rate": 2.2463983182493595e-05, + "loss": 0.3772, + "step": 440 + }, + { + "epoch": 1.118313404657017, + "grad_norm": 1.2586482763290405, + "learning_rate": 2.228304535770228e-05, + "loss": 0.4515, + "step": 445 + }, + { + "epoch": 1.1308999370673378, + "grad_norm": 1.3044593334197998, + "learning_rate": 2.210071013620393e-05, + "loss": 0.3998, + "step": 450 + }, + { + "epoch": 1.143486469477659, + "grad_norm": 1.2569116353988647, + "learning_rate": 2.1917012502627298e-05, + "loss": 0.3918, + "step": 455 + }, + { + "epoch": 1.15607300188798, + "grad_norm": 1.2767741680145264, + "learning_rate": 2.1731987703006933e-05, + "loss": 0.4003, + "step": 460 + }, + { + "epoch": 1.1686595342983008, + "grad_norm": 1.2452210187911987, + "learning_rate": 2.1545671238020507e-05, + "loss": 0.3356, + "step": 465 + }, + { + "epoch": 1.1812460667086218, + "grad_norm": 1.357266902923584, + "learning_rate": 2.13580988561773e-05, + "loss": 0.368, + "step": 470 + }, + { + "epoch": 1.1938325991189427, + "grad_norm": 1.1994765996932983, + "learning_rate": 2.1169306546959176e-05, + "loss": 0.3534, + "step": 475 + }, + { + "epoch": 1.2064191315292636, + "grad_norm": 1.2828844785690308, + "learning_rate": 2.097933053391524e-05, + "loss": 0.3893, + "step": 480 + }, + { + "epoch": 1.2190056639395848, + "grad_norm": 1.3772032260894775, + "learning_rate": 2.0788207267711672e-05, + "loss": 0.3905, + "step": 485 + }, + { + "epoch": 1.2315921963499057, + "grad_norm": 1.2792960405349731, + "learning_rate": 2.059597341913791e-05, + "loss": 0.3658, + "step": 490 + }, + { + "epoch": 1.2441787287602266, + "grad_norm": 1.2451218366622925, + "learning_rate": 2.0402665872070656e-05, + "loss": 0.2998, + "step": 495 + }, + { + "epoch": 1.2567652611705475, + "grad_norm": 1.3423794507980347, + "learning_rate": 2.0208321716396965e-05, + "loss": 0.2821, + "step": 500 + }, + { + "epoch": 1.2693517935808685, + "grad_norm": 1.267855167388916, + "learning_rate": 2.0012978240897813e-05, + "loss": 0.3404, + "step": 505 + }, + { + "epoch": 1.2819383259911894, + "grad_norm": 1.281585693359375, + "learning_rate": 1.98166729260935e-05, + "loss": 0.331, + "step": 510 + }, + { + "epoch": 1.2945248584015103, + "grad_norm": 1.2135661840438843, + "learning_rate": 1.9619443437052282e-05, + "loss": 0.3324, + "step": 515 + }, + { + "epoch": 1.3071113908118313, + "grad_norm": 1.4117573499679565, + "learning_rate": 1.9421327616163564e-05, + "loss": 0.3246, + "step": 520 + }, + { + "epoch": 1.3196979232221522, + "grad_norm": 1.4633241891860962, + "learning_rate": 1.922236347587711e-05, + "loss": 0.3399, + "step": 525 + }, + { + "epoch": 1.3322844556324733, + "grad_norm": 1.3921277523040771, + "learning_rate": 1.902258919140956e-05, + "loss": 0.2704, + "step": 530 + }, + { + "epoch": 1.3448709880427943, + "grad_norm": 1.4302300214767456, + "learning_rate": 1.882204309341982e-05, + "loss": 0.3216, + "step": 535 + }, + { + "epoch": 1.3574575204531152, + "grad_norm": 1.251111388206482, + "learning_rate": 1.86207636606545e-05, + "loss": 0.2886, + "step": 540 + }, + { + "epoch": 1.3700440528634361, + "grad_norm": 1.251848578453064, + "learning_rate": 1.8418789512565048e-05, + "loss": 0.2988, + "step": 545 + }, + { + "epoch": 1.382630585273757, + "grad_norm": 1.3789279460906982, + "learning_rate": 1.8216159401897812e-05, + "loss": 0.3013, + "step": 550 + }, + { + "epoch": 1.395217117684078, + "grad_norm": 1.5965968370437622, + "learning_rate": 1.801291220725859e-05, + "loss": 0.3046, + "step": 555 + }, + { + "epoch": 1.4078036500943991, + "grad_norm": 1.3434410095214844, + "learning_rate": 1.7809086925652953e-05, + "loss": 0.3104, + "step": 560 + }, + { + "epoch": 1.42039018250472, + "grad_norm": 1.6298123598098755, + "learning_rate": 1.760472266500396e-05, + "loss": 0.2811, + "step": 565 + }, + { + "epoch": 1.432976714915041, + "grad_norm": 1.389017105102539, + "learning_rate": 1.7399858636648443e-05, + "loss": 0.3231, + "step": 570 + }, + { + "epoch": 1.445563247325362, + "grad_norm": 1.1415812969207764, + "learning_rate": 1.719453414781363e-05, + "loss": 0.2919, + "step": 575 + }, + { + "epoch": 1.4581497797356828, + "grad_norm": 1.1277958154678345, + "learning_rate": 1.6988788594075193e-05, + "loss": 0.2907, + "step": 580 + }, + { + "epoch": 1.4707363121460038, + "grad_norm": 1.3546321392059326, + "learning_rate": 1.678266145179846e-05, + "loss": 0.241, + "step": 585 + }, + { + "epoch": 1.4833228445563247, + "grad_norm": 1.2336082458496094, + "learning_rate": 1.6576192270564096e-05, + "loss": 0.2626, + "step": 590 + }, + { + "epoch": 1.4959093769666456, + "grad_norm": 1.1894423961639404, + "learning_rate": 1.6369420665579727e-05, + "loss": 0.2491, + "step": 595 + }, + { + "epoch": 1.5084959093769665, + "grad_norm": 1.4173060655593872, + "learning_rate": 1.6162386310078966e-05, + "loss": 0.298, + "step": 600 + }, + { + "epoch": 1.5210824417872875, + "grad_norm": 1.3801525831222534, + "learning_rate": 1.595512892770933e-05, + "loss": 0.291, + "step": 605 + }, + { + "epoch": 1.5336689741976086, + "grad_norm": 1.2483835220336914, + "learning_rate": 1.5747688284910457e-05, + "loss": 0.2196, + "step": 610 + }, + { + "epoch": 1.5462555066079295, + "grad_norm": 1.50223970413208, + "learning_rate": 1.554010418328415e-05, + "loss": 0.2477, + "step": 615 + }, + { + "epoch": 1.5588420390182505, + "grad_norm": 1.2241572141647339, + "learning_rate": 1.5332416451957603e-05, + "loss": 0.2584, + "step": 620 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 1.3092647790908813, + "learning_rate": 1.5124664939941458e-05, + "loss": 0.242, + "step": 625 + }, + { + "epoch": 1.5840151038388925, + "grad_norm": 1.2375574111938477, + "learning_rate": 1.4916889508483922e-05, + "loss": 0.2756, + "step": 630 + }, + { + "epoch": 1.5966016362492135, + "grad_norm": 1.3810296058654785, + "learning_rate": 1.4709130023422636e-05, + "loss": 0.2465, + "step": 635 + }, + { + "epoch": 1.6091881686595344, + "grad_norm": 1.3656350374221802, + "learning_rate": 1.4501426347535602e-05, + "loss": 0.2509, + "step": 640 + }, + { + "epoch": 1.6217747010698553, + "grad_norm": 1.3518686294555664, + "learning_rate": 1.4293818332892725e-05, + "loss": 0.2641, + "step": 645 + }, + { + "epoch": 1.6343612334801763, + "grad_norm": 1.7594561576843262, + "learning_rate": 1.4086345813209398e-05, + "loss": 0.2352, + "step": 650 + }, + { + "epoch": 1.6469477658904972, + "grad_norm": 1.3476065397262573, + "learning_rate": 1.3879048596203637e-05, + "loss": 0.2344, + "step": 655 + }, + { + "epoch": 1.6595342983008181, + "grad_norm": 1.4118843078613281, + "learning_rate": 1.3671966455958143e-05, + "loss": 0.227, + "step": 660 + }, + { + "epoch": 1.672120830711139, + "grad_norm": 1.1513901948928833, + "learning_rate": 1.3465139125288884e-05, + "loss": 0.2082, + "step": 665 + }, + { + "epoch": 1.68470736312146, + "grad_norm": 1.385729432106018, + "learning_rate": 1.3258606288121545e-05, + "loss": 0.2405, + "step": 670 + }, + { + "epoch": 1.697293895531781, + "grad_norm": 1.222360372543335, + "learning_rate": 1.3052407571877415e-05, + "loss": 0.2433, + "step": 675 + }, + { + "epoch": 1.7098804279421018, + "grad_norm": 1.0658655166625977, + "learning_rate": 1.2846582539870034e-05, + "loss": 0.206, + "step": 680 + }, + { + "epoch": 1.7224669603524227, + "grad_norm": 1.421258807182312, + "learning_rate": 1.2641170683714222e-05, + "loss": 0.224, + "step": 685 + }, + { + "epoch": 1.735053492762744, + "grad_norm": 1.21181321144104, + "learning_rate": 1.2436211415748809e-05, + "loss": 0.2108, + "step": 690 + }, + { + "epoch": 1.7476400251730648, + "grad_norm": 1.143449306488037, + "learning_rate": 1.223174406147461e-05, + "loss": 0.2453, + "step": 695 + }, + { + "epoch": 1.7602265575833858, + "grad_norm": 1.3285601139068604, + "learning_rate": 1.202780785200904e-05, + "loss": 0.2245, + "step": 700 + }, + { + "epoch": 1.7728130899937067, + "grad_norm": 1.3922922611236572, + "learning_rate": 1.1824441916558843e-05, + "loss": 0.203, + "step": 705 + }, + { + "epoch": 1.7853996224040278, + "grad_norm": 1.375600814819336, + "learning_rate": 1.1621685274912381e-05, + "loss": 0.2002, + "step": 710 + }, + { + "epoch": 1.7979861548143488, + "grad_norm": 1.450546145439148, + "learning_rate": 1.1419576829952935e-05, + "loss": 0.2067, + "step": 715 + }, + { + "epoch": 1.8105726872246697, + "grad_norm": 1.189211368560791, + "learning_rate": 1.1218155360194368e-05, + "loss": 0.1951, + "step": 720 + }, + { + "epoch": 1.8231592196349906, + "grad_norm": 1.2211610078811646, + "learning_rate": 1.1017459512340742e-05, + "loss": 0.1656, + "step": 725 + }, + { + "epoch": 1.8357457520453115, + "grad_norm": 1.1490590572357178, + "learning_rate": 1.0817527793871143e-05, + "loss": 0.1802, + "step": 730 + }, + { + "epoch": 1.8483322844556325, + "grad_norm": 1.371356725692749, + "learning_rate": 1.0618398565651315e-05, + "loss": 0.1669, + "step": 735 + }, + { + "epoch": 1.8609188168659534, + "grad_norm": 1.3361284732818604, + "learning_rate": 1.0420110034573304e-05, + "loss": 0.1883, + "step": 740 + }, + { + "epoch": 1.8735053492762743, + "grad_norm": 1.1178481578826904, + "learning_rate": 1.0222700246224735e-05, + "loss": 0.1536, + "step": 745 + }, + { + "epoch": 1.8860918816865953, + "grad_norm": 1.4041897058486938, + "learning_rate": 1.0026207077589017e-05, + "loss": 0.1859, + "step": 750 + }, + { + "epoch": 1.8986784140969162, + "grad_norm": 1.3695752620697021, + "learning_rate": 9.83066822977789e-06, + "loss": 0.1737, + "step": 755 + }, + { + "epoch": 1.911264946507237, + "grad_norm": 1.0110995769500732, + "learning_rate": 9.636121220797708e-06, + "loss": 0.1805, + "step": 760 + }, + { + "epoch": 1.9238514789175583, + "grad_norm": 1.3886349201202393, + "learning_rate": 9.44260337835088e-06, + "loss": 0.1718, + "step": 765 + }, + { + "epoch": 1.9364380113278792, + "grad_norm": 1.2027732133865356, + "learning_rate": 9.250151832673785e-06, + "loss": 0.1772, + "step": 770 + }, + { + "epoch": 1.9490245437382, + "grad_norm": 1.0328295230865479, + "learning_rate": 9.058803509412647e-06, + "loss": 0.168, + "step": 775 + }, + { + "epoch": 1.961611076148521, + "grad_norm": 1.1336723566055298, + "learning_rate": 8.868595122538569e-06, + "loss": 0.1833, + "step": 780 + }, + { + "epoch": 1.9741976085588422, + "grad_norm": 1.1561397314071655, + "learning_rate": 8.679563167303242e-06, + "loss": 0.1779, + "step": 785 + }, + { + "epoch": 1.9867841409691631, + "grad_norm": 1.344976782798767, + "learning_rate": 8.491743913236629e-06, + "loss": 0.175, + "step": 790 + }, + { + "epoch": 1.999370673379484, + "grad_norm": 1.115304946899414, + "learning_rate": 8.305173397187912e-06, + "loss": 0.1514, + "step": 795 + }, + { + "epoch": 2.0100692259282567, + "grad_norm": 1.1556038856506348, + "learning_rate": 8.119887416411119e-06, + "loss": 0.135, + "step": 800 + }, + { + "epoch": 2.0226557583385776, + "grad_norm": 1.3383346796035767, + "learning_rate": 7.935921521696703e-06, + "loss": 0.1598, + "step": 805 + }, + { + "epoch": 2.0352422907488985, + "grad_norm": 1.2285635471343994, + "learning_rate": 7.753311010550423e-06, + "loss": 0.1277, + "step": 810 + }, + { + "epoch": 2.0478288231592194, + "grad_norm": 1.0143258571624756, + "learning_rate": 7.572090920420831e-06, + "loss": 0.1146, + "step": 815 + }, + { + "epoch": 2.060415355569541, + "grad_norm": 1.1648786067962646, + "learning_rate": 7.392296021976615e-06, + "loss": 0.145, + "step": 820 + }, + { + "epoch": 2.0730018879798617, + "grad_norm": 0.9765985608100891, + "learning_rate": 7.21396081243517e-06, + "loss": 0.1119, + "step": 825 + }, + { + "epoch": 2.0855884203901827, + "grad_norm": 1.1732720136642456, + "learning_rate": 7.037119508943623e-06, + "loss": 0.1206, + "step": 830 + }, + { + "epoch": 2.0981749528005036, + "grad_norm": 0.9976708292961121, + "learning_rate": 6.86180604201361e-06, + "loss": 0.1466, + "step": 835 + }, + { + "epoch": 2.1107614852108245, + "grad_norm": 1.1881070137023926, + "learning_rate": 6.688054049011e-06, + "loss": 0.1192, + "step": 840 + }, + { + "epoch": 2.1233480176211454, + "grad_norm": 1.1547623872756958, + "learning_rate": 6.515896867701924e-06, + "loss": 0.1136, + "step": 845 + }, + { + "epoch": 2.1359345500314664, + "grad_norm": 1.0519601106643677, + "learning_rate": 6.345367529856254e-06, + "loss": 0.1353, + "step": 850 + }, + { + "epoch": 2.1485210824417873, + "grad_norm": 1.1143457889556885, + "learning_rate": 6.176498754909823e-06, + "loss": 0.1148, + "step": 855 + }, + { + "epoch": 2.1611076148521082, + "grad_norm": 1.0581791400909424, + "learning_rate": 6.009322943686515e-06, + "loss": 0.114, + "step": 860 + }, + { + "epoch": 2.173694147262429, + "grad_norm": 0.8599725365638733, + "learning_rate": 5.843872172181554e-06, + "loss": 0.0902, + "step": 865 + }, + { + "epoch": 2.18628067967275, + "grad_norm": 1.1006416082382202, + "learning_rate": 5.680178185407073e-06, + "loss": 0.1067, + "step": 870 + }, + { + "epoch": 2.198867212083071, + "grad_norm": 1.1201493740081787, + "learning_rate": 5.518272391301223e-06, + "loss": 0.1221, + "step": 875 + }, + { + "epoch": 2.211453744493392, + "grad_norm": 1.0375794172286987, + "learning_rate": 5.3581858547019095e-06, + "loss": 0.1284, + "step": 880 + }, + { + "epoch": 2.224040276903713, + "grad_norm": 0.971002995967865, + "learning_rate": 5.199949291386409e-06, + "loss": 0.1176, + "step": 885 + }, + { + "epoch": 2.236626809314034, + "grad_norm": 1.062329888343811, + "learning_rate": 5.043593062177925e-06, + "loss": 0.1063, + "step": 890 + }, + { + "epoch": 2.2492133417243547, + "grad_norm": 0.7883113026618958, + "learning_rate": 4.889147167120268e-06, + "loss": 0.0934, + "step": 895 + }, + { + "epoch": 2.2617998741346756, + "grad_norm": 1.1446378231048584, + "learning_rate": 4.736641239721761e-06, + "loss": 0.1167, + "step": 900 + }, + { + "epoch": 2.274386406544997, + "grad_norm": 0.9391647577285767, + "learning_rate": 4.586104541269444e-06, + "loss": 0.1092, + "step": 905 + }, + { + "epoch": 2.286972938955318, + "grad_norm": 0.8940252065658569, + "learning_rate": 4.4375659552147245e-06, + "loss": 0.1156, + "step": 910 + }, + { + "epoch": 2.299559471365639, + "grad_norm": 1.1003035306930542, + "learning_rate": 4.2910539816315166e-06, + "loss": 0.1252, + "step": 915 + }, + { + "epoch": 2.31214600377596, + "grad_norm": 0.8707892298698425, + "learning_rate": 4.146596731747938e-06, + "loss": 0.1117, + "step": 920 + }, + { + "epoch": 2.3247325361862807, + "grad_norm": 0.9858210682868958, + "learning_rate": 4.004221922552608e-06, + "loss": 0.0911, + "step": 925 + }, + { + "epoch": 2.3373190685966017, + "grad_norm": 1.1481062173843384, + "learning_rate": 3.863956871476593e-06, + "loss": 0.0975, + "step": 930 + }, + { + "epoch": 2.3499056010069226, + "grad_norm": 0.9825985431671143, + "learning_rate": 3.7258284911520273e-06, + "loss": 0.1154, + "step": 935 + }, + { + "epoch": 2.3624921334172435, + "grad_norm": 0.7458732724189758, + "learning_rate": 3.5898632842483746e-06, + "loss": 0.1002, + "step": 940 + }, + { + "epoch": 2.3750786658275644, + "grad_norm": 0.809174120426178, + "learning_rate": 3.4560873383873713e-06, + "loss": 0.1038, + "step": 945 + }, + { + "epoch": 2.3876651982378854, + "grad_norm": 0.8212640881538391, + "learning_rate": 3.3245263211376e-06, + "loss": 0.0975, + "step": 950 + }, + { + "epoch": 2.4002517306482063, + "grad_norm": 1.0393940210342407, + "learning_rate": 3.1952054750896677e-06, + "loss": 0.0963, + "step": 955 + }, + { + "epoch": 2.412838263058527, + "grad_norm": 0.9464735388755798, + "learning_rate": 3.0681496130128903e-06, + "loss": 0.1017, + "step": 960 + }, + { + "epoch": 2.425424795468848, + "grad_norm": 0.8546525239944458, + "learning_rate": 2.9433831130944926e-06, + "loss": 0.1152, + "step": 965 + }, + { + "epoch": 2.4380113278791695, + "grad_norm": 0.8620587587356567, + "learning_rate": 2.8209299142621527e-06, + "loss": 0.1058, + "step": 970 + }, + { + "epoch": 2.4505978602894904, + "grad_norm": 0.8297479748725891, + "learning_rate": 2.7008135115908657e-06, + "loss": 0.0994, + "step": 975 + }, + { + "epoch": 2.4631843926998114, + "grad_norm": 0.9620172381401062, + "learning_rate": 2.583056951794922e-06, + "loss": 0.0945, + "step": 980 + }, + { + "epoch": 2.4757709251101323, + "grad_norm": 0.9340348243713379, + "learning_rate": 2.467682828805956e-06, + "loss": 0.0877, + "step": 985 + }, + { + "epoch": 2.4883574575204532, + "grad_norm": 0.9892323613166809, + "learning_rate": 2.3547132794378395e-06, + "loss": 0.1078, + "step": 990 + }, + { + "epoch": 2.500943989930774, + "grad_norm": 0.8138073086738586, + "learning_rate": 2.244169979139315e-06, + "loss": 0.0981, + "step": 995 + }, + { + "epoch": 2.513530522341095, + "grad_norm": 1.0038889646530151, + "learning_rate": 2.1360741378351074e-06, + "loss": 0.0981, + "step": 1000 + }, + { + "epoch": 2.526117054751416, + "grad_norm": 0.7669501304626465, + "learning_rate": 2.030446495856392e-06, + "loss": 0.0814, + "step": 1005 + }, + { + "epoch": 2.538703587161737, + "grad_norm": 0.786217451095581, + "learning_rate": 1.927307319961345e-06, + "loss": 0.0918, + "step": 1010 + }, + { + "epoch": 2.551290119572058, + "grad_norm": 0.8327719569206238, + "learning_rate": 1.8266763994465702e-06, + "loss": 0.1008, + "step": 1015 + }, + { + "epoch": 2.563876651982379, + "grad_norm": 0.8695824146270752, + "learning_rate": 1.7285730423501328e-06, + "loss": 0.0993, + "step": 1020 + }, + { + "epoch": 2.5764631843926997, + "grad_norm": 0.9312455654144287, + "learning_rate": 1.6330160717469133e-06, + "loss": 0.0943, + "step": 1025 + }, + { + "epoch": 2.5890497168030207, + "grad_norm": 0.7334368824958801, + "learning_rate": 1.5400238221370416e-06, + "loss": 0.084, + "step": 1030 + }, + { + "epoch": 2.6016362492133416, + "grad_norm": 1.0148022174835205, + "learning_rate": 1.4496141359280557e-06, + "loss": 0.0763, + "step": 1035 + }, + { + "epoch": 2.6142227816236625, + "grad_norm": 0.7404589056968689, + "learning_rate": 1.3618043600114804e-06, + "loss": 0.0797, + "step": 1040 + }, + { + "epoch": 2.6268093140339834, + "grad_norm": 0.7286416292190552, + "learning_rate": 1.2766113424344816e-06, + "loss": 0.0833, + "step": 1045 + }, + { + "epoch": 2.6393958464443044, + "grad_norm": 0.6316741108894348, + "learning_rate": 1.1940514291672382e-06, + "loss": 0.0894, + "step": 1050 + }, + { + "epoch": 2.6519823788546253, + "grad_norm": 0.7137883305549622, + "learning_rate": 1.114140460966645e-06, + "loss": 0.0768, + "step": 1055 + }, + { + "epoch": 2.6645689112649467, + "grad_norm": 0.651564359664917, + "learning_rate": 1.036893770336938e-06, + "loss": 0.0796, + "step": 1060 + }, + { + "epoch": 2.6771554436752676, + "grad_norm": 1.0661816596984863, + "learning_rate": 9.623261785878584e-07, + "loss": 0.0913, + "step": 1065 + }, + { + "epoch": 2.6897419760855885, + "grad_norm": 0.7153500914573669, + "learning_rate": 8.904519929908905e-07, + "loss": 0.0933, + "step": 1070 + }, + { + "epoch": 2.7023285084959094, + "grad_norm": 0.6933944225311279, + "learning_rate": 8.212850040341274e-07, + "loss": 0.0892, + "step": 1075 + }, + { + "epoch": 2.7149150409062304, + "grad_norm": 0.8534197211265564, + "learning_rate": 7.548384827762911e-07, + "loss": 0.0766, + "step": 1080 + }, + { + "epoch": 2.7275015733165513, + "grad_norm": 0.7704508900642395, + "learning_rate": 6.911251783004091e-07, + "loss": 0.0934, + "step": 1085 + }, + { + "epoch": 2.7400881057268722, + "grad_norm": 0.7494409680366516, + "learning_rate": 6.301573152676665e-07, + "loss": 0.0824, + "step": 1090 + }, + { + "epoch": 2.752674638137193, + "grad_norm": 0.768134355545044, + "learning_rate": 5.71946591571852e-07, + "loss": 0.0908, + "step": 1095 + }, + { + "epoch": 2.765261170547514, + "grad_norm": 0.8560143113136292, + "learning_rate": 5.165041760948796e-07, + "loss": 0.0952, + "step": 1100 + }, + { + "epoch": 2.777847702957835, + "grad_norm": 0.8723145723342896, + "learning_rate": 4.6384070656383227e-07, + "loss": 0.0776, + "step": 1105 + }, + { + "epoch": 2.790434235368156, + "grad_norm": 0.8055382966995239, + "learning_rate": 4.1396628750988485e-07, + "loss": 0.0901, + "step": 1110 + }, + { + "epoch": 2.8030207677784773, + "grad_norm": 0.804349422454834, + "learning_rate": 3.6689048832957093e-07, + "loss": 0.1047, + "step": 1115 + }, + { + "epoch": 2.8156073001887982, + "grad_norm": 0.7426791191101074, + "learning_rate": 3.2262234144868117e-07, + "loss": 0.0941, + "step": 1120 + }, + { + "epoch": 2.828193832599119, + "grad_norm": 0.7353984713554382, + "learning_rate": 2.811703405892296e-07, + "loss": 0.0909, + "step": 1125 + }, + { + "epoch": 2.84078036500944, + "grad_norm": 0.6610361337661743, + "learning_rate": 2.425424391397607e-07, + "loss": 0.0767, + "step": 1130 + }, + { + "epoch": 2.853366897419761, + "grad_norm": 0.73736572265625, + "learning_rate": 2.0674604862932656e-07, + "loss": 0.0978, + "step": 1135 + }, + { + "epoch": 2.865953429830082, + "grad_norm": 0.7291079163551331, + "learning_rate": 1.7378803730545122e-07, + "loss": 0.0919, + "step": 1140 + }, + { + "epoch": 2.878539962240403, + "grad_norm": 1.0240551233291626, + "learning_rate": 1.436747288163115e-07, + "loss": 0.099, + "step": 1145 + }, + { + "epoch": 2.891126494650724, + "grad_norm": 0.8893821239471436, + "learning_rate": 1.1641190099741905e-07, + "loss": 0.0849, + "step": 1150 + }, + { + "epoch": 2.9037130270610447, + "grad_norm": 0.7920015454292297, + "learning_rate": 9.200478476303565e-08, + "loss": 0.0771, + "step": 1155 + }, + { + "epoch": 2.9162995594713657, + "grad_norm": 0.7665479779243469, + "learning_rate": 7.045806310251257e-08, + "loss": 0.0964, + "step": 1160 + }, + { + "epoch": 2.9288860918816866, + "grad_norm": 0.79188472032547, + "learning_rate": 5.177587018176777e-08, + "loss": 0.096, + "step": 1165 + }, + { + "epoch": 2.9414726242920075, + "grad_norm": 0.7572119832038879, + "learning_rate": 3.596179055006365e-08, + "loss": 0.0904, + "step": 1170 + }, + { + "epoch": 2.9540591567023284, + "grad_norm": 0.8481308221817017, + "learning_rate": 2.301885845224061e-08, + "loss": 0.0845, + "step": 1175 + }, + { + "epoch": 2.9666456891126494, + "grad_norm": 0.8993321061134338, + "learning_rate": 1.294955724653768e-08, + "loss": 0.099, + "step": 1180 + }, + { + "epoch": 2.9792322215229703, + "grad_norm": 0.8289469480514526, + "learning_rate": 5.7558189281120555e-09, + "loss": 0.0831, + "step": 1185 + }, + { + "epoch": 2.991818753933291, + "grad_norm": 0.7895488142967224, + "learning_rate": 1.4390237583405963e-09, + "loss": 0.0856, + "step": 1190 + }, + { + "epoch": 3.0, + "step": 1194, + "total_flos": 1.5090307282417746e+18, + "train_loss": 0.4524924519473384, + "train_runtime": 1194.3141, + "train_samples_per_second": 31.929, + "train_steps_per_second": 1.0 + } + ], + "logging_steps": 5, + "max_steps": 1194, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5090307282417746e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..643b5a3303c57e2a4e0f5272ab7ee851134a13fc --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/3_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0d26a535bfdad409fac4a92a59b0fc332ed4dbf182f3cb27c208cc8e6b37b4 +size 8273 diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec1d1511b8f1ad7dfe7065fb55b28b5463aa5d08 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 4_128_e3_3e-5 + results: [] +--- + + + +# 4_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbedfbbe9516665e8c3f792c882205c9f9442c80 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "gate_proj", + "v_proj", + "o_proj", + "down_proj", + "k_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e675ab15168851299772d47b0e812d8e75578872 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ceeed6507ae1e7bd4f043f6538e6ece9c432c07069aaf60e674354fe6366b9 +size 671150064 diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6bdf43a6414307d01f85eb02907da10e23471a9f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4414949418081976e+18, + "train_loss": 0.43230534175820384, + "train_runtime": 1139.8627, + "train_samples": 12014, + "train_samples_per_second": 31.62, + "train_steps_per_second": 0.99 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/chat_template.jinja b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6bdf43a6414307d01f85eb02907da10e23471a9f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4414949418081976e+18, + "train_loss": 0.43230534175820384, + "train_runtime": 1139.8627, + "train_samples": 12014, + "train_samples_per_second": 31.62, + "train_steps_per_second": 0.99 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7f3b48e5be38ef2d891846860a1a11b188df2893 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1618 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013315579227696404, + "grad_norm": 0.6325865387916565, + "learning_rate": 2.1052631578947366e-06, + "loss": 1.6479, + "step": 5 + }, + { + "epoch": 0.02663115845539281, + "grad_norm": 0.5910339951515198, + "learning_rate": 4.736842105263158e-06, + "loss": 1.6599, + "step": 10 + }, + { + "epoch": 0.03994673768308921, + "grad_norm": 0.6727888584136963, + "learning_rate": 7.3684210526315784e-06, + "loss": 1.6471, + "step": 15 + }, + { + "epoch": 0.05326231691078562, + "grad_norm": 0.4895073473453522, + "learning_rate": 9.999999999999999e-06, + "loss": 1.6361, + "step": 20 + }, + { + "epoch": 0.06657789613848203, + "grad_norm": 0.48855116963386536, + "learning_rate": 1.263157894736842e-05, + "loss": 1.6264, + "step": 25 + }, + { + "epoch": 0.07989347536617843, + "grad_norm": 0.4614351689815521, + "learning_rate": 1.5263157894736842e-05, + "loss": 1.571, + "step": 30 + }, + { + "epoch": 0.09320905459387484, + "grad_norm": 0.4768711030483246, + "learning_rate": 1.7894736842105264e-05, + "loss": 1.5596, + "step": 35 + }, + { + "epoch": 0.10652463382157124, + "grad_norm": 0.45624077320098877, + "learning_rate": 2.0526315789473685e-05, + "loss": 1.4905, + "step": 40 + }, + { + "epoch": 0.11984021304926765, + "grad_norm": 0.47357749938964844, + "learning_rate": 2.3157894736842103e-05, + "loss": 1.5293, + "step": 45 + }, + { + "epoch": 0.13315579227696406, + "grad_norm": 0.46852031350135803, + "learning_rate": 2.578947368421053e-05, + "loss": 1.4843, + "step": 50 + }, + { + "epoch": 0.14647137150466044, + "grad_norm": 0.5218312740325928, + "learning_rate": 2.8421052631578946e-05, + "loss": 1.4101, + "step": 55 + }, + { + "epoch": 0.15978695073235685, + "grad_norm": 0.5118663311004639, + "learning_rate": 2.9999741868614275e-05, + "loss": 1.4365, + "step": 60 + }, + { + "epoch": 0.17310252996005326, + "grad_norm": 0.5035812258720398, + "learning_rate": 2.999683799255387e-05, + "loss": 1.3981, + "step": 65 + }, + { + "epoch": 0.18641810918774968, + "grad_norm": 0.5110086798667908, + "learning_rate": 2.9990708202925038e-05, + "loss": 1.4083, + "step": 70 + }, + { + "epoch": 0.19973368841544606, + "grad_norm": 0.579852283000946, + "learning_rate": 2.9981353818283835e-05, + "loss": 1.3457, + "step": 75 + }, + { + "epoch": 0.21304926764314247, + "grad_norm": 0.6390470266342163, + "learning_rate": 2.996877685081685e-05, + "loss": 1.316, + "step": 80 + }, + { + "epoch": 0.22636484687083888, + "grad_norm": 0.584377110004425, + "learning_rate": 2.995298000590839e-05, + "loss": 1.3593, + "step": 85 + }, + { + "epoch": 0.2396804260985353, + "grad_norm": 0.6537099480628967, + "learning_rate": 2.99339666815585e-05, + "loss": 1.2776, + "step": 90 + }, + { + "epoch": 0.2529960053262317, + "grad_norm": 0.722669780254364, + "learning_rate": 2.9911740967652065e-05, + "loss": 1.243, + "step": 95 + }, + { + "epoch": 0.2663115845539281, + "grad_norm": 0.7104046940803528, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.2018, + "step": 100 + }, + { + "epoch": 0.2796271637816245, + "grad_norm": 0.6718395352363586, + "learning_rate": 2.9857672184706038e-05, + "loss": 1.1281, + "step": 105 + }, + { + "epoch": 0.2929427430093209, + "grad_norm": 0.7997251152992249, + "learning_rate": 2.9825840746199534e-05, + "loss": 1.126, + "step": 110 + }, + { + "epoch": 0.3062583222370173, + "grad_norm": 0.7351905107498169, + "learning_rate": 2.9790820176700872e-05, + "loss": 1.1581, + "step": 115 + }, + { + "epoch": 0.3195739014647137, + "grad_norm": 0.8547983765602112, + "learning_rate": 2.975261800935339e-05, + "loss": 1.0911, + "step": 120 + }, + { + "epoch": 0.33288948069241014, + "grad_norm": 0.7865622043609619, + "learning_rate": 2.971124246168202e-05, + "loss": 1.1067, + "step": 125 + }, + { + "epoch": 0.34620505992010653, + "grad_norm": 0.9200232625007629, + "learning_rate": 2.9666702433825614e-05, + "loss": 1.1212, + "step": 130 + }, + { + "epoch": 0.3595206391478029, + "grad_norm": 0.7528833746910095, + "learning_rate": 2.9619007506622506e-05, + "loss": 1.079, + "step": 135 + }, + { + "epoch": 0.37283621837549935, + "grad_norm": 0.9026903510093689, + "learning_rate": 2.956816793954958e-05, + "loss": 1.0512, + "step": 140 + }, + { + "epoch": 0.38615179760319573, + "grad_norm": 0.9324937462806702, + "learning_rate": 2.951419466851542e-05, + "loss": 1.0182, + "step": 145 + }, + { + "epoch": 0.3994673768308921, + "grad_norm": 0.9434838891029358, + "learning_rate": 2.9457099303507904e-05, + "loss": 0.9729, + "step": 150 + }, + { + "epoch": 0.41278295605858856, + "grad_norm": 0.9802708625793457, + "learning_rate": 2.939689412609684e-05, + "loss": 0.9977, + "step": 155 + }, + { + "epoch": 0.42609853528628494, + "grad_norm": 1.1556743383407593, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.9598, + "step": 160 + }, + { + "epoch": 0.4394141145139814, + "grad_norm": 1.0488144159317017, + "learning_rate": 2.9267206802257952e-05, + "loss": 0.9468, + "step": 165 + }, + { + "epoch": 0.45272969374167776, + "grad_norm": 1.1494991779327393, + "learning_rate": 2.919775255238392e-05, + "loss": 0.9199, + "step": 170 + }, + { + "epoch": 0.46604527296937415, + "grad_norm": 1.0939358472824097, + "learning_rate": 2.9125244277213176e-05, + "loss": 0.8905, + "step": 175 + }, + { + "epoch": 0.4793608521970706, + "grad_norm": 0.9866417646408081, + "learning_rate": 2.9049697573728818e-05, + "loss": 0.9444, + "step": 180 + }, + { + "epoch": 0.49267643142476697, + "grad_norm": 1.0837732553482056, + "learning_rate": 2.8971128692498872e-05, + "loss": 0.8655, + "step": 185 + }, + { + "epoch": 0.5059920106524634, + "grad_norm": 1.1335080862045288, + "learning_rate": 2.8889554534180664e-05, + "loss": 0.8091, + "step": 190 + }, + { + "epoch": 0.5193075898801598, + "grad_norm": 0.9415338635444641, + "learning_rate": 2.8804992645885415e-05, + "loss": 0.9026, + "step": 195 + }, + { + "epoch": 0.5326231691078562, + "grad_norm": 1.0674079656600952, + "learning_rate": 2.8717461217403726e-05, + "loss": 0.8389, + "step": 200 + }, + { + "epoch": 0.5459387483355526, + "grad_norm": 1.0837892293930054, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.7899, + "step": 205 + }, + { + "epoch": 0.559254327563249, + "grad_norm": 1.1236594915390015, + "learning_rate": 2.853356568882657e-05, + "loss": 0.8579, + "step": 210 + }, + { + "epoch": 0.5725699067909454, + "grad_norm": 1.1732523441314697, + "learning_rate": 2.843724114580848e-05, + "loss": 0.8274, + "step": 215 + }, + { + "epoch": 0.5858854860186418, + "grad_norm": 1.4369699954986572, + "learning_rate": 2.833802616824972e-05, + "loss": 0.7631, + "step": 220 + }, + { + "epoch": 0.5992010652463382, + "grad_norm": 1.1882026195526123, + "learning_rate": 2.8235942097911964e-05, + "loss": 0.7479, + "step": 225 + }, + { + "epoch": 0.6125166444740346, + "grad_norm": 1.0691964626312256, + "learning_rate": 2.8131010893716676e-05, + "loss": 0.7558, + "step": 230 + }, + { + "epoch": 0.625832223701731, + "grad_norm": 1.1723179817199707, + "learning_rate": 2.8023255127021593e-05, + "loss": 0.7719, + "step": 235 + }, + { + "epoch": 0.6391478029294274, + "grad_norm": 1.1993366479873657, + "learning_rate": 2.7912697976765516e-05, + "loss": 0.7081, + "step": 240 + }, + { + "epoch": 0.6524633821571239, + "grad_norm": 1.1502537727355957, + "learning_rate": 2.7799363224482334e-05, + "loss": 0.7143, + "step": 245 + }, + { + "epoch": 0.6657789613848203, + "grad_norm": 1.1936613321304321, + "learning_rate": 2.7683275249185507e-05, + "loss": 0.6583, + "step": 250 + }, + { + "epoch": 0.6790945406125166, + "grad_norm": 1.123653531074524, + "learning_rate": 2.7564459022123953e-05, + "loss": 0.6954, + "step": 255 + }, + { + "epoch": 0.6924101198402131, + "grad_norm": 1.1180944442749023, + "learning_rate": 2.744294010141061e-05, + "loss": 0.6411, + "step": 260 + }, + { + "epoch": 0.7057256990679095, + "grad_norm": 1.2447624206542969, + "learning_rate": 2.7318744626524704e-05, + "loss": 0.6312, + "step": 265 + }, + { + "epoch": 0.7190412782956058, + "grad_norm": 1.1097793579101562, + "learning_rate": 2.719189931268899e-05, + "loss": 0.6417, + "step": 270 + }, + { + "epoch": 0.7323568575233023, + "grad_norm": 1.161447286605835, + "learning_rate": 2.7062431445123127e-05, + "loss": 0.6611, + "step": 275 + }, + { + "epoch": 0.7456724367509987, + "grad_norm": 1.386555790901184, + "learning_rate": 2.6930368873174493e-05, + "loss": 0.5968, + "step": 280 + }, + { + "epoch": 0.758988015978695, + "grad_norm": 1.2418088912963867, + "learning_rate": 2.6795740004327584e-05, + "loss": 0.5808, + "step": 285 + }, + { + "epoch": 0.7723035952063915, + "grad_norm": 1.257629632949829, + "learning_rate": 2.665857379809338e-05, + "loss": 0.5793, + "step": 290 + }, + { + "epoch": 0.7856191744340879, + "grad_norm": 1.3418606519699097, + "learning_rate": 2.6518899759780017e-05, + "loss": 0.6035, + "step": 295 + }, + { + "epoch": 0.7989347536617842, + "grad_norm": 1.132562279701233, + "learning_rate": 2.637674793414596e-05, + "loss": 0.6195, + "step": 300 + }, + { + "epoch": 0.8122503328894807, + "grad_norm": 1.2257986068725586, + "learning_rate": 2.6232148898937223e-05, + "loss": 0.558, + "step": 305 + }, + { + "epoch": 0.8255659121171771, + "grad_norm": 1.187639832496643, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.5415, + "step": 310 + }, + { + "epoch": 0.8388814913448736, + "grad_norm": 1.5163284540176392, + "learning_rate": 2.5935734136139407e-05, + "loss": 0.5116, + "step": 315 + }, + { + "epoch": 0.8521970705725699, + "grad_norm": 1.2966004610061646, + "learning_rate": 2.5783982169218125e-05, + "loss": 0.5126, + "step": 320 + }, + { + "epoch": 0.8655126498002663, + "grad_norm": 1.402432918548584, + "learning_rate": 2.5629910500342424e-05, + "loss": 0.5949, + "step": 325 + }, + { + "epoch": 0.8788282290279628, + "grad_norm": 1.1825752258300781, + "learning_rate": 2.5473552271291092e-05, + "loss": 0.5517, + "step": 330 + }, + { + "epoch": 0.8921438082556591, + "grad_norm": 1.2831752300262451, + "learning_rate": 2.531494111569629e-05, + "loss": 0.5427, + "step": 335 + }, + { + "epoch": 0.9054593874833555, + "grad_norm": 1.1530109643936157, + "learning_rate": 2.5154111151808752e-05, + "loss": 0.5127, + "step": 340 + }, + { + "epoch": 0.918774966711052, + "grad_norm": 1.1827281713485718, + "learning_rate": 2.4991096975158757e-05, + "loss": 0.5341, + "step": 345 + }, + { + "epoch": 0.9320905459387483, + "grad_norm": 1.2368087768554688, + "learning_rate": 2.4825933651114375e-05, + "loss": 0.4926, + "step": 350 + }, + { + "epoch": 0.9454061251664447, + "grad_norm": 1.147762656211853, + "learning_rate": 2.4658656707338733e-05, + "loss": 0.4854, + "step": 355 + }, + { + "epoch": 0.9587217043941412, + "grad_norm": 1.3367023468017578, + "learning_rate": 2.4489302126147768e-05, + "loss": 0.5249, + "step": 360 + }, + { + "epoch": 0.9720372836218375, + "grad_norm": 1.3229764699935913, + "learning_rate": 2.431790633677019e-05, + "loss": 0.4552, + "step": 365 + }, + { + "epoch": 0.9853528628495339, + "grad_norm": 1.284393072128296, + "learning_rate": 2.414450620751136e-05, + "loss": 0.5149, + "step": 370 + }, + { + "epoch": 0.9986684420772304, + "grad_norm": 1.3867682218551636, + "learning_rate": 2.396913903782268e-05, + "loss": 0.449, + "step": 375 + }, + { + "epoch": 1.0106524633821572, + "grad_norm": 1.2377896308898926, + "learning_rate": 2.379184255027822e-05, + "loss": 0.3776, + "step": 380 + }, + { + "epoch": 1.0239680426098536, + "grad_norm": 1.1107348203659058, + "learning_rate": 2.361265488246039e-05, + "loss": 0.4101, + "step": 385 + }, + { + "epoch": 1.0372836218375499, + "grad_norm": 1.2676260471343994, + "learning_rate": 2.3431614578756304e-05, + "loss": 0.3866, + "step": 390 + }, + { + "epoch": 1.0505992010652463, + "grad_norm": 1.1657370328903198, + "learning_rate": 2.3248760582066605e-05, + "loss": 0.3831, + "step": 395 + }, + { + "epoch": 1.0639147802929427, + "grad_norm": 1.262725591659546, + "learning_rate": 2.306413222542866e-05, + "loss": 0.4127, + "step": 400 + }, + { + "epoch": 1.0772303595206392, + "grad_norm": 1.293502688407898, + "learning_rate": 2.287776922355573e-05, + "loss": 0.4012, + "step": 405 + }, + { + "epoch": 1.0905459387483356, + "grad_norm": 1.2764818668365479, + "learning_rate": 2.268971166429412e-05, + "loss": 0.356, + "step": 410 + }, + { + "epoch": 1.103861517976032, + "grad_norm": 1.2606568336486816, + "learning_rate": 2.25e-05, + "loss": 0.4087, + "step": 415 + }, + { + "epoch": 1.1171770972037283, + "grad_norm": 1.305738925933838, + "learning_rate": 2.2308675038837887e-05, + "loss": 0.3766, + "step": 420 + }, + { + "epoch": 1.1304926764314247, + "grad_norm": 1.239574670791626, + "learning_rate": 2.2115777936002533e-05, + "loss": 0.3531, + "step": 425 + }, + { + "epoch": 1.1438082556591211, + "grad_norm": 1.2828826904296875, + "learning_rate": 2.192135018486618e-05, + "loss": 0.385, + "step": 430 + }, + { + "epoch": 1.1571238348868176, + "grad_norm": 1.2147557735443115, + "learning_rate": 2.172543360805308e-05, + "loss": 0.364, + "step": 435 + }, + { + "epoch": 1.170439414114514, + "grad_norm": 1.2704769372940063, + "learning_rate": 2.152807034844322e-05, + "loss": 0.3931, + "step": 440 + }, + { + "epoch": 1.1837549933422105, + "grad_norm": 1.4151273965835571, + "learning_rate": 2.1329302860107065e-05, + "loss": 0.343, + "step": 445 + }, + { + "epoch": 1.1970705725699067, + "grad_norm": 1.11943781375885, + "learning_rate": 2.1129173899173474e-05, + "loss": 0.3146, + "step": 450 + }, + { + "epoch": 1.2103861517976031, + "grad_norm": 1.2189421653747559, + "learning_rate": 2.0927726514632557e-05, + "loss": 0.3312, + "step": 455 + }, + { + "epoch": 1.2237017310252996, + "grad_norm": 1.2393699884414673, + "learning_rate": 2.072500403907559e-05, + "loss": 0.3281, + "step": 460 + }, + { + "epoch": 1.237017310252996, + "grad_norm": 1.3109112977981567, + "learning_rate": 2.0521050079373895e-05, + "loss": 0.309, + "step": 465 + }, + { + "epoch": 1.2503328894806924, + "grad_norm": 1.1078263521194458, + "learning_rate": 2.0315908507298713e-05, + "loss": 0.2962, + "step": 470 + }, + { + "epoch": 1.2636484687083889, + "grad_norm": 1.3175866603851318, + "learning_rate": 2.0109623450084154e-05, + "loss": 0.3127, + "step": 475 + }, + { + "epoch": 1.2769640479360853, + "grad_norm": 1.284356713294983, + "learning_rate": 1.990223928093511e-05, + "loss": 0.3258, + "step": 480 + }, + { + "epoch": 1.2902796271637818, + "grad_norm": 1.4759033918380737, + "learning_rate": 1.9693800609482318e-05, + "loss": 0.3367, + "step": 485 + }, + { + "epoch": 1.303595206391478, + "grad_norm": 1.402441382408142, + "learning_rate": 1.9484352272186555e-05, + "loss": 0.3059, + "step": 490 + }, + { + "epoch": 1.3169107856191744, + "grad_norm": 1.305856704711914, + "learning_rate": 1.9273939322694035e-05, + "loss": 0.3028, + "step": 495 + }, + { + "epoch": 1.3302263648468708, + "grad_norm": 1.1870449781417847, + "learning_rate": 1.906260702214508e-05, + "loss": 0.341, + "step": 500 + }, + { + "epoch": 1.3435419440745673, + "grad_norm": 1.3310000896453857, + "learning_rate": 1.8850400829438157e-05, + "loss": 0.2872, + "step": 505 + }, + { + "epoch": 1.3568575233022637, + "grad_norm": 1.1719340085983276, + "learning_rate": 1.8637366391451414e-05, + "loss": 0.309, + "step": 510 + }, + { + "epoch": 1.37017310252996, + "grad_norm": 1.3877124786376953, + "learning_rate": 1.842354953322373e-05, + "loss": 0.2924, + "step": 515 + }, + { + "epoch": 1.3834886817576564, + "grad_norm": 1.3386942148208618, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.3177, + "step": 520 + }, + { + "epoch": 1.3968042609853528, + "grad_norm": 1.348686933517456, + "learning_rate": 1.7993752687825003e-05, + "loss": 0.2881, + "step": 525 + }, + { + "epoch": 1.4101198402130493, + "grad_norm": 1.3905302286148071, + "learning_rate": 1.777786515264123e-05, + "loss": 0.259, + "step": 530 + }, + { + "epoch": 1.4234354194407457, + "grad_norm": 1.2143332958221436, + "learning_rate": 1.7561380081304063e-05, + "loss": 0.2521, + "step": 535 + }, + { + "epoch": 1.4367509986684421, + "grad_norm": 1.3101756572723389, + "learning_rate": 1.7344344041105177e-05, + "loss": 0.2499, + "step": 540 + }, + { + "epoch": 1.4500665778961386, + "grad_norm": 1.3429689407348633, + "learning_rate": 1.7126803717853086e-05, + "loss": 0.2492, + "step": 545 + }, + { + "epoch": 1.463382157123835, + "grad_norm": 1.3282181024551392, + "learning_rate": 1.6908805905830752e-05, + "loss": 0.2749, + "step": 550 + }, + { + "epoch": 1.4766977363515312, + "grad_norm": 1.3172345161437988, + "learning_rate": 1.6690397497729818e-05, + "loss": 0.2677, + "step": 555 + }, + { + "epoch": 1.4900133155792277, + "grad_norm": 1.2431777715682983, + "learning_rate": 1.647162547456372e-05, + "loss": 0.2431, + "step": 560 + }, + { + "epoch": 1.503328894806924, + "grad_norm": 1.1845526695251465, + "learning_rate": 1.6252536895561754e-05, + "loss": 0.2669, + "step": 565 + }, + { + "epoch": 1.5166444740346205, + "grad_norm": 1.14946448802948, + "learning_rate": 1.6033178888046368e-05, + "loss": 0.2576, + "step": 570 + }, + { + "epoch": 1.5299600532623168, + "grad_norm": 1.1496193408966064, + "learning_rate": 1.5813598637295767e-05, + "loss": 0.2162, + "step": 575 + }, + { + "epoch": 1.5432756324900132, + "grad_norm": 1.5620521306991577, + "learning_rate": 1.5593843376394043e-05, + "loss": 0.252, + "step": 580 + }, + { + "epoch": 1.5565912117177096, + "grad_norm": 1.2729089260101318, + "learning_rate": 1.5373960376071095e-05, + "loss": 0.224, + "step": 585 + }, + { + "epoch": 1.569906790945406, + "grad_norm": 1.412192463874817, + "learning_rate": 1.515399693453435e-05, + "loss": 0.2301, + "step": 590 + }, + { + "epoch": 1.5832223701731025, + "grad_norm": 1.3245265483856201, + "learning_rate": 1.493400036729465e-05, + "loss": 0.239, + "step": 595 + }, + { + "epoch": 1.596537949400799, + "grad_norm": 1.4611005783081055, + "learning_rate": 1.4714017996988384e-05, + "loss": 0.2745, + "step": 600 + }, + { + "epoch": 1.6098535286284954, + "grad_norm": 1.1875600814819336, + "learning_rate": 1.4494097143198083e-05, + "loss": 0.1918, + "step": 605 + }, + { + "epoch": 1.6231691078561918, + "grad_norm": 1.2184709310531616, + "learning_rate": 1.4274285112273701e-05, + "loss": 0.2006, + "step": 610 + }, + { + "epoch": 1.6364846870838883, + "grad_norm": 1.1442549228668213, + "learning_rate": 1.4054629187156702e-05, + "loss": 0.2014, + "step": 615 + }, + { + "epoch": 1.6498002663115847, + "grad_norm": 1.167523741722107, + "learning_rate": 1.3835176617209241e-05, + "loss": 0.2163, + "step": 620 + }, + { + "epoch": 1.663115845539281, + "grad_norm": 1.115420937538147, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2044, + "step": 625 + }, + { + "epoch": 1.6764314247669774, + "grad_norm": 1.3794896602630615, + "learning_rate": 1.3397070311402377e-05, + "loss": 0.2153, + "step": 630 + }, + { + "epoch": 1.6897470039946738, + "grad_norm": 1.2318223714828491, + "learning_rate": 1.3178510814947112e-05, + "loss": 0.1936, + "step": 635 + }, + { + "epoch": 1.70306258322237, + "grad_norm": 1.1374400854110718, + "learning_rate": 1.296034313219816e-05, + "loss": 0.2152, + "step": 640 + }, + { + "epoch": 1.7163781624500665, + "grad_norm": 1.350420594215393, + "learning_rate": 1.2742614192387417e-05, + "loss": 0.1992, + "step": 645 + }, + { + "epoch": 1.729693741677763, + "grad_norm": 1.3927255868911743, + "learning_rate": 1.2525370830370447e-05, + "loss": 0.1791, + "step": 650 + }, + { + "epoch": 1.7430093209054593, + "grad_norm": 1.1840988397598267, + "learning_rate": 1.2308659776551985e-05, + "loss": 0.1926, + "step": 655 + }, + { + "epoch": 1.7563249001331558, + "grad_norm": 1.0770949125289917, + "learning_rate": 1.209252764683395e-05, + "loss": 0.1826, + "step": 660 + }, + { + "epoch": 1.7696404793608522, + "grad_norm": 1.3267724514007568, + "learning_rate": 1.1877020932588067e-05, + "loss": 0.2054, + "step": 665 + }, + { + "epoch": 1.7829560585885487, + "grad_norm": 1.1034506559371948, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.18, + "step": 670 + }, + { + "epoch": 1.796271637816245, + "grad_norm": 1.200170874595642, + "learning_rate": 1.1448069033374135e-05, + "loss": 0.1743, + "step": 675 + }, + { + "epoch": 1.8095872170439415, + "grad_norm": 1.2136085033416748, + "learning_rate": 1.1234716118640149e-05, + "loss": 0.1952, + "step": 680 + }, + { + "epoch": 1.822902796271638, + "grad_norm": 1.1143393516540527, + "learning_rate": 1.1022173139998556e-05, + "loss": 0.1881, + "step": 685 + }, + { + "epoch": 1.8362183754993342, + "grad_norm": 1.3163598775863647, + "learning_rate": 1.0810485816772251e-05, + "loss": 0.1776, + "step": 690 + }, + { + "epoch": 1.8495339547270306, + "grad_norm": 1.1336655616760254, + "learning_rate": 1.0599699684227313e-05, + "loss": 0.1699, + "step": 695 + }, + { + "epoch": 1.862849533954727, + "grad_norm": 1.249768614768982, + "learning_rate": 1.0389860083778056e-05, + "loss": 0.1622, + "step": 700 + }, + { + "epoch": 1.8761651131824233, + "grad_norm": 1.2032581567764282, + "learning_rate": 1.0181012153233851e-05, + "loss": 0.1774, + "step": 705 + }, + { + "epoch": 1.8894806924101197, + "grad_norm": 1.115145206451416, + "learning_rate": 9.973200817089655e-06, + "loss": 0.1616, + "step": 710 + }, + { + "epoch": 1.9027962716378162, + "grad_norm": 1.2484344244003296, + "learning_rate": 9.7664707768625e-06, + "loss": 0.1441, + "step": 715 + }, + { + "epoch": 1.9161118508655126, + "grad_norm": 1.3662018775939941, + "learning_rate": 9.560866501475913e-06, + "loss": 0.166, + "step": 720 + }, + { + "epoch": 1.929427430093209, + "grad_norm": 1.1251648664474487, + "learning_rate": 9.35643221769436e-06, + "loss": 0.1859, + "step": 725 + }, + { + "epoch": 1.9427430093209055, + "grad_norm": 1.2358664274215698, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1877, + "step": 730 + }, + { + "epoch": 1.956058588548602, + "grad_norm": 1.0403860807418823, + "learning_rate": 8.951249264182403e-06, + "loss": 0.1622, + "step": 735 + }, + { + "epoch": 1.9693741677762984, + "grad_norm": 1.0493032932281494, + "learning_rate": 8.750587751837313e-06, + "loss": 0.1733, + "step": 740 + }, + { + "epoch": 1.9826897470039948, + "grad_norm": 1.0602375268936157, + "learning_rate": 8.551270527119784e-06, + "loss": 0.155, + "step": 745 + }, + { + "epoch": 1.996005326231691, + "grad_norm": 1.30148446559906, + "learning_rate": 8.35334046441041e-06, + "loss": 0.1656, + "step": 750 + }, + { + "epoch": 2.007989347536618, + "grad_norm": 1.0473822355270386, + "learning_rate": 8.156840139702554e-06, + "loss": 0.146, + "step": 755 + }, + { + "epoch": 2.0213049267643144, + "grad_norm": 1.1577352285385132, + "learning_rate": 7.961811821444008e-06, + "loss": 0.1122, + "step": 760 + }, + { + "epoch": 2.034620505992011, + "grad_norm": 1.155028223991394, + "learning_rate": 7.768297461444766e-06, + "loss": 0.1263, + "step": 765 + }, + { + "epoch": 2.0479360852197073, + "grad_norm": 1.1804635524749756, + "learning_rate": 7.576338685852955e-06, + "loss": 0.1125, + "step": 770 + }, + { + "epoch": 2.0612516644474033, + "grad_norm": 1.1472411155700684, + "learning_rate": 7.385976786200765e-06, + "loss": 0.114, + "step": 775 + }, + { + "epoch": 2.0745672436750997, + "grad_norm": 1.0748953819274902, + "learning_rate": 7.197252710522395e-06, + "loss": 0.13, + "step": 780 + }, + { + "epoch": 2.087882822902796, + "grad_norm": 0.8666295409202576, + "learning_rate": 7.010207054545873e-06, + "loss": 0.1069, + "step": 785 + }, + { + "epoch": 2.1011984021304926, + "grad_norm": 1.1171802282333374, + "learning_rate": 6.8248800529606604e-06, + "loss": 0.107, + "step": 790 + }, + { + "epoch": 2.114513981358189, + "grad_norm": 0.8693982362747192, + "learning_rate": 6.641311570762918e-06, + "loss": 0.1165, + "step": 795 + }, + { + "epoch": 2.1278295605858855, + "grad_norm": 1.027008295059204, + "learning_rate": 6.4595410946803e-06, + "loss": 0.111, + "step": 800 + }, + { + "epoch": 2.141145139813582, + "grad_norm": 0.9499240517616272, + "learning_rate": 6.2796077246781046e-06, + "loss": 0.1068, + "step": 805 + }, + { + "epoch": 2.1544607190412783, + "grad_norm": 1.0865854024887085, + "learning_rate": 6.1015501655486365e-06, + "loss": 0.1209, + "step": 810 + }, + { + "epoch": 2.1677762982689748, + "grad_norm": 1.2311553955078125, + "learning_rate": 5.925406718585552e-06, + "loss": 0.1144, + "step": 815 + }, + { + "epoch": 2.181091877496671, + "grad_norm": 1.1496752500534058, + "learning_rate": 5.751215273345036e-06, + "loss": 0.1126, + "step": 820 + }, + { + "epoch": 2.1944074567243677, + "grad_norm": 0.9541864395141602, + "learning_rate": 5.5790132994954935e-06, + "loss": 0.0967, + "step": 825 + }, + { + "epoch": 2.207723035952064, + "grad_norm": 0.9668985605239868, + "learning_rate": 5.408837838757588e-06, + "loss": 0.1005, + "step": 830 + }, + { + "epoch": 2.2210386151797605, + "grad_norm": 0.7837927341461182, + "learning_rate": 5.240725496936373e-06, + "loss": 0.0998, + "step": 835 + }, + { + "epoch": 2.2343541944074565, + "grad_norm": 1.0072641372680664, + "learning_rate": 5.0747124360471125e-06, + "loss": 0.0839, + "step": 840 + }, + { + "epoch": 2.247669773635153, + "grad_norm": 1.2144713401794434, + "learning_rate": 4.910834366536631e-06, + "loss": 0.1084, + "step": 845 + }, + { + "epoch": 2.2609853528628494, + "grad_norm": 0.9212657809257507, + "learning_rate": 4.74912653960177e-06, + "loss": 0.0865, + "step": 850 + }, + { + "epoch": 2.274300932090546, + "grad_norm": 1.0378059148788452, + "learning_rate": 4.589623739606625e-06, + "loss": 0.1019, + "step": 855 + }, + { + "epoch": 2.2876165113182423, + "grad_norm": 0.7480744123458862, + "learning_rate": 4.4323602766002165e-06, + "loss": 0.1009, + "step": 860 + }, + { + "epoch": 2.3009320905459387, + "grad_norm": 0.9392785429954529, + "learning_rate": 4.277369978936188e-06, + "loss": 0.0938, + "step": 865 + }, + { + "epoch": 2.314247669773635, + "grad_norm": 1.1454919576644897, + "learning_rate": 4.1246861859961114e-06, + "loss": 0.11, + "step": 870 + }, + { + "epoch": 2.3275632490013316, + "grad_norm": 1.3475381135940552, + "learning_rate": 3.974341741017978e-06, + "loss": 0.1404, + "step": 875 + }, + { + "epoch": 2.340878828229028, + "grad_norm": 0.842522919178009, + "learning_rate": 3.826368984031414e-06, + "loss": 0.0963, + "step": 880 + }, + { + "epoch": 2.3541944074567245, + "grad_norm": 0.951410710811615, + "learning_rate": 3.6807997449011426e-06, + "loss": 0.0936, + "step": 885 + }, + { + "epoch": 2.367509986684421, + "grad_norm": 0.9912516474723816, + "learning_rate": 3.5376653364801703e-06, + "loss": 0.0978, + "step": 890 + }, + { + "epoch": 2.3808255659121174, + "grad_norm": 0.9771565794944763, + "learning_rate": 3.3969965478742038e-06, + "loss": 0.096, + "step": 895 + }, + { + "epoch": 2.3941411451398134, + "grad_norm": 0.8241867423057556, + "learning_rate": 3.258823637818722e-06, + "loss": 0.1016, + "step": 900 + }, + { + "epoch": 2.40745672436751, + "grad_norm": 0.9509169459342957, + "learning_rate": 3.123176328170131e-06, + "loss": 0.0902, + "step": 905 + }, + { + "epoch": 2.4207723035952062, + "grad_norm": 1.0115470886230469, + "learning_rate": 2.990083797512401e-06, + "loss": 0.096, + "step": 910 + }, + { + "epoch": 2.4340878828229027, + "grad_norm": 0.8552641868591309, + "learning_rate": 2.8595746748805805e-06, + "loss": 0.0867, + "step": 915 + }, + { + "epoch": 2.447403462050599, + "grad_norm": 1.0813640356063843, + "learning_rate": 2.7316770336025166e-06, + "loss": 0.0986, + "step": 920 + }, + { + "epoch": 2.4607190412782955, + "grad_norm": 0.8260074853897095, + "learning_rate": 2.60641838526008e-06, + "loss": 0.0848, + "step": 925 + }, + { + "epoch": 2.474034620505992, + "grad_norm": 0.6660257577896118, + "learning_rate": 2.483825673771279e-06, + "loss": 0.0907, + "step": 930 + }, + { + "epoch": 2.4873501997336884, + "grad_norm": 0.7299282550811768, + "learning_rate": 2.363925269594449e-06, + "loss": 0.0803, + "step": 935 + }, + { + "epoch": 2.500665778961385, + "grad_norm": 0.7600756883621216, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.081, + "step": 940 + }, + { + "epoch": 2.5139813581890813, + "grad_norm": 0.7322470545768738, + "learning_rate": 2.1323039638015024e-06, + "loss": 0.0987, + "step": 945 + }, + { + "epoch": 2.5272969374167777, + "grad_norm": 0.8522736430168152, + "learning_rate": 2.020632885375684e-06, + "loss": 0.0916, + "step": 950 + }, + { + "epoch": 2.540612516644474, + "grad_norm": 0.8361161947250366, + "learning_rate": 1.9117537499251416e-06, + "loss": 0.0914, + "step": 955 + }, + { + "epoch": 2.5539280958721706, + "grad_norm": 0.9073055386543274, + "learning_rate": 1.8056899780323016e-06, + "loss": 0.0917, + "step": 960 + }, + { + "epoch": 2.567243675099867, + "grad_norm": 0.9437789916992188, + "learning_rate": 1.7024643846772981e-06, + "loss": 0.0829, + "step": 965 + }, + { + "epoch": 2.5805592543275635, + "grad_norm": 0.6315572261810303, + "learning_rate": 1.6020991743303264e-06, + "loss": 0.0761, + "step": 970 + }, + { + "epoch": 2.5938748335552595, + "grad_norm": 0.8422160148620605, + "learning_rate": 1.5046159361753226e-06, + "loss": 0.086, + "step": 975 + }, + { + "epoch": 2.607190412782956, + "grad_norm": 0.6973531246185303, + "learning_rate": 1.4100356394659863e-06, + "loss": 0.0906, + "step": 980 + }, + { + "epoch": 2.6205059920106524, + "grad_norm": 0.7962859869003296, + "learning_rate": 1.318378629015184e-06, + "loss": 0.0934, + "step": 985 + }, + { + "epoch": 2.633821571238349, + "grad_norm": 0.729464590549469, + "learning_rate": 1.229664620818633e-06, + "loss": 0.0899, + "step": 990 + }, + { + "epoch": 2.6471371504660453, + "grad_norm": 0.8225182294845581, + "learning_rate": 1.1439126978138769e-06, + "loss": 0.0859, + "step": 995 + }, + { + "epoch": 2.6604527296937417, + "grad_norm": 0.7297401428222656, + "learning_rate": 1.0611413057754221e-06, + "loss": 0.0816, + "step": 1000 + }, + { + "epoch": 2.673768308921438, + "grad_norm": 1.0208203792572021, + "learning_rate": 9.813682493469396e-07, + "loss": 0.0902, + "step": 1005 + }, + { + "epoch": 2.6870838881491346, + "grad_norm": 0.6697482466697693, + "learning_rate": 9.046106882113753e-07, + "loss": 0.0892, + "step": 1010 + }, + { + "epoch": 2.700399467376831, + "grad_norm": 0.6955990195274353, + "learning_rate": 8.308851333997918e-07, + "loss": 0.0931, + "step": 1015 + }, + { + "epoch": 2.7137150466045274, + "grad_norm": 0.7623629570007324, + "learning_rate": 7.602074437397455e-07, + "loss": 0.077, + "step": 1020 + }, + { + "epoch": 2.7270306258322234, + "grad_norm": 0.6877727508544922, + "learning_rate": 6.925928224439532e-07, + "loss": 0.0799, + "step": 1025 + }, + { + "epoch": 2.74034620505992, + "grad_norm": 0.7570786476135254, + "learning_rate": 6.280558138399805e-07, + "loss": 0.0935, + "step": 1030 + }, + { + "epoch": 2.7536617842876163, + "grad_norm": 0.6350867748260498, + "learning_rate": 5.666103002416762e-07, + "loss": 0.0724, + "step": 1035 + }, + { + "epoch": 2.7669773635153128, + "grad_norm": 0.651555061340332, + "learning_rate": 5.082694989629916e-07, + "loss": 0.0731, + "step": 1040 + }, + { + "epoch": 2.780292942743009, + "grad_norm": 0.7878639101982117, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.0841, + "step": 1045 + }, + { + "epoch": 2.7936085219707056, + "grad_norm": 0.7648158073425293, + "learning_rate": 4.0095156070571513e-07, + "loss": 0.0753, + "step": 1050 + }, + { + "epoch": 2.806924101198402, + "grad_norm": 0.6726447343826294, + "learning_rate": 3.5199750848627753e-07, + "loss": 0.0773, + "step": 1055 + }, + { + "epoch": 2.8202396804260985, + "grad_norm": 0.8263749480247498, + "learning_rate": 3.0619433313909706e-07, + "loss": 0.0987, + "step": 1060 + }, + { + "epoch": 2.833555259653795, + "grad_norm": 0.7295187711715698, + "learning_rate": 2.635518872134185e-07, + "loss": 0.0903, + "step": 1065 + }, + { + "epoch": 2.8468708388814914, + "grad_norm": 0.7376275658607483, + "learning_rate": 2.2407934336583446e-07, + "loss": 0.0768, + "step": 1070 + }, + { + "epoch": 2.860186418109188, + "grad_norm": 0.7473690509796143, + "learning_rate": 1.8778519238719204e-07, + "loss": 0.081, + "step": 1075 + }, + { + "epoch": 2.8735019973368843, + "grad_norm": 0.6857921481132507, + "learning_rate": 1.5467724137617046e-07, + "loss": 0.078, + "step": 1080 + }, + { + "epoch": 2.8868175765645807, + "grad_norm": 0.8117772340774536, + "learning_rate": 1.2476261205992934e-07, + "loss": 0.0866, + "step": 1085 + }, + { + "epoch": 2.900133155792277, + "grad_norm": 0.6111254096031189, + "learning_rate": 9.804773926217092e-08, + "loss": 0.068, + "step": 1090 + }, + { + "epoch": 2.9134487350199736, + "grad_norm": 0.7473412752151489, + "learning_rate": 7.453836951897885e-08, + "loss": 0.0727, + "step": 1095 + }, + { + "epoch": 2.92676431424767, + "grad_norm": 0.5296754240989685, + "learning_rate": 5.4239559842695354e-08, + "loss": 0.0824, + "step": 1100 + }, + { + "epoch": 2.940079893475366, + "grad_norm": 0.6617763638496399, + "learning_rate": 3.715567663412966e-08, + "loss": 0.0849, + "step": 1105 + }, + { + "epoch": 2.9533954727030625, + "grad_norm": 0.7495169639587402, + "learning_rate": 2.3290394743317732e-08, + "loss": 0.0704, + "step": 1110 + }, + { + "epoch": 2.966711051930759, + "grad_norm": 0.5953858494758606, + "learning_rate": 1.2646696679042835e-08, + "loss": 0.0789, + "step": 1115 + }, + { + "epoch": 2.9800266311584553, + "grad_norm": 0.6375231742858887, + "learning_rate": 5.2268719672671215e-09, + "loss": 0.0788, + "step": 1120 + }, + { + "epoch": 2.993342210386152, + "grad_norm": 0.732907772064209, + "learning_rate": 1.0325166586572233e-09, + "loss": 0.0761, + "step": 1125 + }, + { + "epoch": 3.0, + "step": 1128, + "total_flos": 1.4414949418081976e+18, + "train_loss": 0.43230534175820384, + "train_runtime": 1139.8627, + "train_samples_per_second": 31.62, + "train_steps_per_second": 0.99 + } + ], + "logging_steps": 5, + "max_steps": 1128, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4414949418081976e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..985f732f4ef043c6a466b7412c2a561b9673936a --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/4_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4871ec3c6154b846aaad60029d43cdcc968db4143e7eb2fccbbcacfb93df121 +size 8273 diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/README.md b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..13a1341cf2980f93e3bc8d86ac006813ae0d9e29 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/test/processed/knowledge_50 +model-index: +- name: 5_128_e3_3e-5 + results: [] +--- + + + +# 5_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/test/processed/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/adapter_config.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..de8065d74a5d9a19eeca190e34655f0c3614f5a2 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "k_proj", + "o_proj", + "gate_proj", + "up_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e66ed8b6339400ba14ac5d6ba0f931eba14152bf --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b56cd4322c48428b0e9220f08abed806c706d43895de2f61ebf21ee65f5809e +size 671150064 diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/all_results.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f7d76d61634f28ff22e12059ab7d7eb0e9cc88f4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.225514319484027e+18, + "train_loss": 0.4207725525358037, + "train_runtime": 994.6554, + "train_samples": 10463, + "train_samples_per_second": 31.558, + "train_steps_per_second": 0.986 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/chat_template.jinja b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/config.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/special_tokens_map.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer_config.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/train_results.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f7d76d61634f28ff22e12059ab7d7eb0e9cc88f4 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.225514319484027e+18, + "train_loss": 0.4207725525358037, + "train_runtime": 994.6554, + "train_samples": 10463, + "train_samples_per_second": 31.558, + "train_steps_per_second": 0.986 +} \ No newline at end of file diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/trainer_state.json b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f39655abe8038f3b424ca026f80b14315b452b69 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1415 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 981, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01529051987767584, + "grad_norm": 0.685274064540863, + "learning_rate": 2.4000000000000003e-06, + "loss": 1.6018, + "step": 5 + }, + { + "epoch": 0.03058103975535168, + "grad_norm": 0.6228384375572205, + "learning_rate": 5.4e-06, + "loss": 1.5909, + "step": 10 + }, + { + "epoch": 0.045871559633027525, + "grad_norm": 0.6067872643470764, + "learning_rate": 8.400000000000001e-06, + "loss": 1.6205, + "step": 15 + }, + { + "epoch": 0.06116207951070336, + "grad_norm": 0.5261496901512146, + "learning_rate": 1.1400000000000001e-05, + "loss": 1.637, + "step": 20 + }, + { + "epoch": 0.0764525993883792, + "grad_norm": 0.5323638916015625, + "learning_rate": 1.44e-05, + "loss": 1.5863, + "step": 25 + }, + { + "epoch": 0.09174311926605505, + "grad_norm": 0.4580898582935333, + "learning_rate": 1.74e-05, + "loss": 1.5922, + "step": 30 + }, + { + "epoch": 0.10703363914373089, + "grad_norm": 0.5250867605209351, + "learning_rate": 2.04e-05, + "loss": 1.5211, + "step": 35 + }, + { + "epoch": 0.12232415902140673, + "grad_norm": 0.5394893288612366, + "learning_rate": 2.3400000000000003e-05, + "loss": 1.4926, + "step": 40 + }, + { + "epoch": 0.13761467889908258, + "grad_norm": 0.4827323853969574, + "learning_rate": 2.64e-05, + "loss": 1.4496, + "step": 45 + }, + { + "epoch": 0.1529051987767584, + "grad_norm": 0.5449787974357605, + "learning_rate": 2.94e-05, + "loss": 1.3795, + "step": 50 + }, + { + "epoch": 0.16819571865443425, + "grad_norm": 0.519065260887146, + "learning_rate": 2.999863360886452e-05, + "loss": 1.377, + "step": 55 + }, + { + "epoch": 0.1834862385321101, + "grad_norm": 0.537458062171936, + "learning_rate": 2.99930830715182e-05, + "loss": 1.3432, + "step": 60 + }, + { + "epoch": 0.19877675840978593, + "grad_norm": 0.6706751585006714, + "learning_rate": 2.9983264567328756e-05, + "loss": 1.4104, + "step": 65 + }, + { + "epoch": 0.21406727828746178, + "grad_norm": 0.7224559783935547, + "learning_rate": 2.9969180891255046e-05, + "loss": 1.3127, + "step": 70 + }, + { + "epoch": 0.22935779816513763, + "grad_norm": 0.6015632152557373, + "learning_rate": 2.9950836052389943e-05, + "loss": 1.3341, + "step": 75 + }, + { + "epoch": 0.24464831804281345, + "grad_norm": 0.6353119015693665, + "learning_rate": 2.9928235272819095e-05, + "loss": 1.196, + "step": 80 + }, + { + "epoch": 0.2599388379204893, + "grad_norm": 0.6725102663040161, + "learning_rate": 2.9901384986134417e-05, + "loss": 1.267, + "step": 85 + }, + { + "epoch": 0.27522935779816515, + "grad_norm": 0.7816044688224792, + "learning_rate": 2.9870292835602647e-05, + "loss": 1.1518, + "step": 90 + }, + { + "epoch": 0.290519877675841, + "grad_norm": 0.7150799036026001, + "learning_rate": 2.9834967671989633e-05, + "loss": 1.1748, + "step": 95 + }, + { + "epoch": 0.3058103975535168, + "grad_norm": 0.7371142506599426, + "learning_rate": 2.9795419551040836e-05, + "loss": 1.0907, + "step": 100 + }, + { + "epoch": 0.3211009174311927, + "grad_norm": 0.7983435988426208, + "learning_rate": 2.9751659730618836e-05, + "loss": 1.102, + "step": 105 + }, + { + "epoch": 0.3363914373088685, + "grad_norm": 0.7851730585098267, + "learning_rate": 2.9703700667498654e-05, + "loss": 1.1271, + "step": 110 + }, + { + "epoch": 0.3516819571865443, + "grad_norm": 0.7610542178153992, + "learning_rate": 2.9651556013821764e-05, + "loss": 0.9916, + "step": 115 + }, + { + "epoch": 0.3669724770642202, + "grad_norm": 0.7749484777450562, + "learning_rate": 2.959524061320984e-05, + "loss": 1.0125, + "step": 120 + }, + { + "epoch": 0.382262996941896, + "grad_norm": 0.9490089416503906, + "learning_rate": 2.9534770496539352e-05, + "loss": 0.9724, + "step": 125 + }, + { + "epoch": 0.39755351681957185, + "grad_norm": 0.8583216071128845, + "learning_rate": 2.9470162877378167e-05, + "loss": 1.0127, + "step": 130 + }, + { + "epoch": 0.41284403669724773, + "grad_norm": 0.843806803226471, + "learning_rate": 2.940143614708549e-05, + "loss": 0.943, + "step": 135 + }, + { + "epoch": 0.42813455657492355, + "grad_norm": 0.9052720665931702, + "learning_rate": 2.9328609869576543e-05, + "loss": 0.9512, + "step": 140 + }, + { + "epoch": 0.4434250764525994, + "grad_norm": 0.9606586694717407, + "learning_rate": 2.9251704775753455e-05, + "loss": 0.9143, + "step": 145 + }, + { + "epoch": 0.45871559633027525, + "grad_norm": 1.011435866355896, + "learning_rate": 2.9170742757603937e-05, + "loss": 0.8852, + "step": 150 + }, + { + "epoch": 0.4740061162079511, + "grad_norm": 1.0943304300308228, + "learning_rate": 2.908574686196949e-05, + "loss": 0.8317, + "step": 155 + }, + { + "epoch": 0.4892966360856269, + "grad_norm": 1.0801130533218384, + "learning_rate": 2.8996741283984803e-05, + "loss": 0.9071, + "step": 160 + }, + { + "epoch": 0.5045871559633027, + "grad_norm": 1.0770649909973145, + "learning_rate": 2.8903751360190327e-05, + "loss": 0.8339, + "step": 165 + }, + { + "epoch": 0.5198776758409785, + "grad_norm": 1.0063605308532715, + "learning_rate": 2.8806803561319903e-05, + "loss": 0.8101, + "step": 170 + }, + { + "epoch": 0.5351681957186545, + "grad_norm": 1.067468523979187, + "learning_rate": 2.8705925484765556e-05, + "loss": 0.8775, + "step": 175 + }, + { + "epoch": 0.5504587155963303, + "grad_norm": 1.1247386932373047, + "learning_rate": 2.8601145846721527e-05, + "loss": 0.7635, + "step": 180 + }, + { + "epoch": 0.5657492354740061, + "grad_norm": 1.2035037279129028, + "learning_rate": 2.84924944740099e-05, + "loss": 0.8206, + "step": 185 + }, + { + "epoch": 0.581039755351682, + "grad_norm": 1.034868597984314, + "learning_rate": 2.8380002295589963e-05, + "loss": 0.8168, + "step": 190 + }, + { + "epoch": 0.5963302752293578, + "grad_norm": 1.0151512622833252, + "learning_rate": 2.8263701333753964e-05, + "loss": 0.7442, + "step": 195 + }, + { + "epoch": 0.6116207951070336, + "grad_norm": 1.1637232303619385, + "learning_rate": 2.8143624695011514e-05, + "loss": 0.732, + "step": 200 + }, + { + "epoch": 0.6269113149847095, + "grad_norm": 1.1152065992355347, + "learning_rate": 2.801980656066545e-05, + "loss": 0.6791, + "step": 205 + }, + { + "epoch": 0.6422018348623854, + "grad_norm": 1.4207673072814941, + "learning_rate": 2.789228217708166e-05, + "loss": 0.665, + "step": 210 + }, + { + "epoch": 0.6574923547400612, + "grad_norm": 1.2393033504486084, + "learning_rate": 2.776108784565583e-05, + "loss": 0.6749, + "step": 215 + }, + { + "epoch": 0.672782874617737, + "grad_norm": 1.5352226495742798, + "learning_rate": 2.762626091247973e-05, + "loss": 0.7174, + "step": 220 + }, + { + "epoch": 0.6880733944954128, + "grad_norm": 1.0892246961593628, + "learning_rate": 2.748783975771024e-05, + "loss": 0.6684, + "step": 225 + }, + { + "epoch": 0.7033639143730887, + "grad_norm": 1.2517621517181396, + "learning_rate": 2.734586378464395e-05, + "loss": 0.6351, + "step": 230 + }, + { + "epoch": 0.7186544342507645, + "grad_norm": 2.9538815021514893, + "learning_rate": 2.7200373408500498e-05, + "loss": 0.6388, + "step": 235 + }, + { + "epoch": 0.7339449541284404, + "grad_norm": 1.219746708869934, + "learning_rate": 2.705141004491792e-05, + "loss": 0.7052, + "step": 240 + }, + { + "epoch": 0.7492354740061162, + "grad_norm": 1.501096487045288, + "learning_rate": 2.689901609816313e-05, + "loss": 0.6177, + "step": 245 + }, + { + "epoch": 0.764525993883792, + "grad_norm": 1.190100908279419, + "learning_rate": 2.6743234949061072e-05, + "loss": 0.5732, + "step": 250 + }, + { + "epoch": 0.7798165137614679, + "grad_norm": 1.1484848260879517, + "learning_rate": 2.658411094264577e-05, + "loss": 0.5817, + "step": 255 + }, + { + "epoch": 0.7951070336391437, + "grad_norm": 1.2813721895217896, + "learning_rate": 2.6421689375537015e-05, + "loss": 0.5518, + "step": 260 + }, + { + "epoch": 0.8103975535168195, + "grad_norm": 1.1426920890808105, + "learning_rate": 2.6256016483046106e-05, + "loss": 0.5447, + "step": 265 + }, + { + "epoch": 0.8256880733944955, + "grad_norm": 1.2212082147598267, + "learning_rate": 2.6087139426014373e-05, + "loss": 0.5943, + "step": 270 + }, + { + "epoch": 0.8409785932721713, + "grad_norm": 1.3299758434295654, + "learning_rate": 2.5915106277388293e-05, + "loss": 0.5591, + "step": 275 + }, + { + "epoch": 0.8562691131498471, + "grad_norm": 1.1552865505218506, + "learning_rate": 2.573996600853492e-05, + "loss": 0.5007, + "step": 280 + }, + { + "epoch": 0.8715596330275229, + "grad_norm": 1.4573205709457397, + "learning_rate": 2.5561768475301556e-05, + "loss": 0.5178, + "step": 285 + }, + { + "epoch": 0.8868501529051988, + "grad_norm": 1.1958438158035278, + "learning_rate": 2.53805644038237e-05, + "loss": 0.5035, + "step": 290 + }, + { + "epoch": 0.9021406727828746, + "grad_norm": 1.216157078742981, + "learning_rate": 2.519640537608521e-05, + "loss": 0.5008, + "step": 295 + }, + { + "epoch": 0.9174311926605505, + "grad_norm": 1.2936855554580688, + "learning_rate": 2.5009343815234845e-05, + "loss": 0.5089, + "step": 300 + }, + { + "epoch": 0.9327217125382263, + "grad_norm": 1.2408041954040527, + "learning_rate": 2.4819432970663375e-05, + "loss": 0.4623, + "step": 305 + }, + { + "epoch": 0.9480122324159022, + "grad_norm": 1.3493720293045044, + "learning_rate": 2.4626726902845477e-05, + "loss": 0.498, + "step": 310 + }, + { + "epoch": 0.963302752293578, + "grad_norm": 1.2752997875213623, + "learning_rate": 2.4431280467950758e-05, + "loss": 0.486, + "step": 315 + }, + { + "epoch": 0.9785932721712538, + "grad_norm": 1.1208157539367676, + "learning_rate": 2.423314930222828e-05, + "loss": 0.4965, + "step": 320 + }, + { + "epoch": 0.9938837920489296, + "grad_norm": 1.2809463739395142, + "learning_rate": 2.4032389806168958e-05, + "loss": 0.4854, + "step": 325 + }, + { + "epoch": 1.0091743119266054, + "grad_norm": 1.158416748046875, + "learning_rate": 2.3829059128450537e-05, + "loss": 0.4206, + "step": 330 + }, + { + "epoch": 1.0244648318042813, + "grad_norm": 1.2214034795761108, + "learning_rate": 2.362321514966945e-05, + "loss": 0.4274, + "step": 335 + }, + { + "epoch": 1.039755351681957, + "grad_norm": 1.2345026731491089, + "learning_rate": 2.3414916465864434e-05, + "loss": 0.3908, + "step": 340 + }, + { + "epoch": 1.0550458715596331, + "grad_norm": 1.4175245761871338, + "learning_rate": 2.320422237183641e-05, + "loss": 0.3812, + "step": 345 + }, + { + "epoch": 1.070336391437309, + "grad_norm": 1.3918770551681519, + "learning_rate": 2.299119284426948e-05, + "loss": 0.3673, + "step": 350 + }, + { + "epoch": 1.0856269113149848, + "grad_norm": 1.3940770626068115, + "learning_rate": 2.277588852465788e-05, + "loss": 0.3241, + "step": 355 + }, + { + "epoch": 1.1009174311926606, + "grad_norm": 1.1514182090759277, + "learning_rate": 2.2558370702043535e-05, + "loss": 0.4066, + "step": 360 + }, + { + "epoch": 1.1162079510703364, + "grad_norm": 1.353812575340271, + "learning_rate": 2.233870129556946e-05, + "loss": 0.3243, + "step": 365 + }, + { + "epoch": 1.1314984709480123, + "grad_norm": 1.2885587215423584, + "learning_rate": 2.211694283685364e-05, + "loss": 0.3487, + "step": 370 + }, + { + "epoch": 1.146788990825688, + "grad_norm": 1.2002590894699097, + "learning_rate": 2.1893158452188696e-05, + "loss": 0.3561, + "step": 375 + }, + { + "epoch": 1.162079510703364, + "grad_norm": 1.4288114309310913, + "learning_rate": 2.166741184457214e-05, + "loss": 0.31, + "step": 380 + }, + { + "epoch": 1.1773700305810397, + "grad_norm": 1.216217279434204, + "learning_rate": 2.1439767275572558e-05, + "loss": 0.3283, + "step": 385 + }, + { + "epoch": 1.1926605504587156, + "grad_norm": 1.6451380252838135, + "learning_rate": 2.1210289547036784e-05, + "loss": 0.3307, + "step": 390 + }, + { + "epoch": 1.2079510703363914, + "grad_norm": 1.2695378065109253, + "learning_rate": 2.0979043982643194e-05, + "loss": 0.2929, + "step": 395 + }, + { + "epoch": 1.2232415902140672, + "grad_norm": 1.5951563119888306, + "learning_rate": 2.0746096409306568e-05, + "loss": 0.2976, + "step": 400 + }, + { + "epoch": 1.238532110091743, + "grad_norm": 1.22429358959198, + "learning_rate": 2.0511513138439597e-05, + "loss": 0.326, + "step": 405 + }, + { + "epoch": 1.2538226299694188, + "grad_norm": 1.4491422176361084, + "learning_rate": 2.0275360947076547e-05, + "loss": 0.3206, + "step": 410 + }, + { + "epoch": 1.2691131498470947, + "grad_norm": 1.2904996871948242, + "learning_rate": 2.0037707058864343e-05, + "loss": 0.2911, + "step": 415 + }, + { + "epoch": 1.2844036697247707, + "grad_norm": 1.261361837387085, + "learning_rate": 1.979861912492651e-05, + "loss": 0.2621, + "step": 420 + }, + { + "epoch": 1.2996941896024465, + "grad_norm": 1.3700158596038818, + "learning_rate": 1.9558165204605473e-05, + "loss": 0.3154, + "step": 425 + }, + { + "epoch": 1.3149847094801224, + "grad_norm": 1.2652907371520996, + "learning_rate": 1.9316413746088594e-05, + "loss": 0.3443, + "step": 430 + }, + { + "epoch": 1.3302752293577982, + "grad_norm": 1.3626536130905151, + "learning_rate": 1.907343356692356e-05, + "loss": 0.3039, + "step": 435 + }, + { + "epoch": 1.345565749235474, + "grad_norm": 1.3984373807907104, + "learning_rate": 1.8829293834428606e-05, + "loss": 0.2991, + "step": 440 + }, + { + "epoch": 1.3608562691131498, + "grad_norm": 1.2593680620193481, + "learning_rate": 1.858406404600319e-05, + "loss": 0.2651, + "step": 445 + }, + { + "epoch": 1.3761467889908257, + "grad_norm": 1.2255761623382568, + "learning_rate": 1.8337814009344716e-05, + "loss": 0.2715, + "step": 450 + }, + { + "epoch": 1.3914373088685015, + "grad_norm": 1.2626287937164307, + "learning_rate": 1.8090613822576858e-05, + "loss": 0.2529, + "step": 455 + }, + { + "epoch": 1.4067278287461773, + "grad_norm": 1.2186583280563354, + "learning_rate": 1.7842533854295294e-05, + "loss": 0.2198, + "step": 460 + }, + { + "epoch": 1.4220183486238533, + "grad_norm": 1.2559664249420166, + "learning_rate": 1.759364472353643e-05, + "loss": 0.3028, + "step": 465 + }, + { + "epoch": 1.4373088685015292, + "grad_norm": 1.3364081382751465, + "learning_rate": 1.7344017279674774e-05, + "loss": 0.2385, + "step": 470 + }, + { + "epoch": 1.452599388379205, + "grad_norm": 1.2114051580429077, + "learning_rate": 1.709372258225482e-05, + "loss": 0.2672, + "step": 475 + }, + { + "epoch": 1.4678899082568808, + "grad_norm": 1.2229247093200684, + "learning_rate": 1.684283188076304e-05, + "loss": 0.2722, + "step": 480 + }, + { + "epoch": 1.4831804281345566, + "grad_norm": 1.1687208414077759, + "learning_rate": 1.659141659434587e-05, + "loss": 0.2129, + "step": 485 + }, + { + "epoch": 1.4984709480122325, + "grad_norm": 1.5788482427597046, + "learning_rate": 1.633954829147932e-05, + "loss": 0.2472, + "step": 490 + }, + { + "epoch": 1.5137614678899083, + "grad_norm": 1.2113409042358398, + "learning_rate": 1.6087298669596162e-05, + "loss": 0.2368, + "step": 495 + }, + { + "epoch": 1.529051987767584, + "grad_norm": 1.3984663486480713, + "learning_rate": 1.583473953467634e-05, + "loss": 0.2037, + "step": 500 + }, + { + "epoch": 1.54434250764526, + "grad_norm": 1.4378868341445923, + "learning_rate": 1.558194278080652e-05, + "loss": 0.2349, + "step": 505 + }, + { + "epoch": 1.5596330275229358, + "grad_norm": 1.3355070352554321, + "learning_rate": 1.5328980369714554e-05, + "loss": 0.2042, + "step": 510 + }, + { + "epoch": 1.5749235474006116, + "grad_norm": 1.3526110649108887, + "learning_rate": 1.507592431028466e-05, + "loss": 0.2068, + "step": 515 + }, + { + "epoch": 1.5902140672782874, + "grad_norm": 1.2498570680618286, + "learning_rate": 1.4822846638059234e-05, + "loss": 0.2002, + "step": 520 + }, + { + "epoch": 1.6055045871559632, + "grad_norm": 1.2077316045761108, + "learning_rate": 1.4569819394733004e-05, + "loss": 0.2307, + "step": 525 + }, + { + "epoch": 1.620795107033639, + "grad_norm": 1.679932951927185, + "learning_rate": 1.4316914607645509e-05, + "loss": 0.1885, + "step": 530 + }, + { + "epoch": 1.6360856269113149, + "grad_norm": 1.1754944324493408, + "learning_rate": 1.4064204269277572e-05, + "loss": 0.1983, + "step": 535 + }, + { + "epoch": 1.6513761467889907, + "grad_norm": 1.3387905359268188, + "learning_rate": 1.3811760316757804e-05, + "loss": 0.1913, + "step": 540 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 1.0718806982040405, + "learning_rate": 1.3559654611384774e-05, + "loss": 0.1767, + "step": 545 + }, + { + "epoch": 1.6819571865443423, + "grad_norm": 1.2957481145858765, + "learning_rate": 1.330795891817082e-05, + "loss": 0.1771, + "step": 550 + }, + { + "epoch": 1.6972477064220184, + "grad_norm": 1.2622219324111938, + "learning_rate": 1.3056744885413216e-05, + "loss": 0.1905, + "step": 555 + }, + { + "epoch": 1.7125382262996942, + "grad_norm": 1.4994255304336548, + "learning_rate": 1.2806084024298648e-05, + "loss": 0.1807, + "step": 560 + }, + { + "epoch": 1.72782874617737, + "grad_norm": 1.226175308227539, + "learning_rate": 1.2556047688546622e-05, + "loss": 0.1838, + "step": 565 + }, + { + "epoch": 1.7431192660550459, + "grad_norm": 1.5730829238891602, + "learning_rate": 1.230670705409777e-05, + "loss": 0.1815, + "step": 570 + }, + { + "epoch": 1.7584097859327217, + "grad_norm": 1.1735397577285767, + "learning_rate": 1.2058133098852735e-05, + "loss": 0.1554, + "step": 575 + }, + { + "epoch": 1.7737003058103975, + "grad_norm": 1.179861068725586, + "learning_rate": 1.1810396582467407e-05, + "loss": 0.1946, + "step": 580 + }, + { + "epoch": 1.7889908256880735, + "grad_norm": 1.304221272468567, + "learning_rate": 1.156356802621035e-05, + "loss": 0.1858, + "step": 585 + }, + { + "epoch": 1.8042813455657494, + "grad_norm": 1.2174237966537476, + "learning_rate": 1.1317717692888014e-05, + "loss": 0.2025, + "step": 590 + }, + { + "epoch": 1.8195718654434252, + "grad_norm": 1.135053038597107, + "learning_rate": 1.1072915566843582e-05, + "loss": 0.1803, + "step": 595 + }, + { + "epoch": 1.834862385321101, + "grad_norm": 1.1694585084915161, + "learning_rate": 1.0829231334035052e-05, + "loss": 0.1777, + "step": 600 + }, + { + "epoch": 1.8501529051987768, + "grad_norm": 1.0292987823486328, + "learning_rate": 1.0586734362198242e-05, + "loss": 0.1491, + "step": 605 + }, + { + "epoch": 1.8654434250764527, + "grad_norm": 1.2495766878128052, + "learning_rate": 1.0345493681100449e-05, + "loss": 0.1528, + "step": 610 + }, + { + "epoch": 1.8807339449541285, + "grad_norm": 1.046033501625061, + "learning_rate": 1.0105577962890235e-05, + "loss": 0.1552, + "step": 615 + }, + { + "epoch": 1.8960244648318043, + "grad_norm": 1.1253432035446167, + "learning_rate": 9.867055502549072e-06, + "loss": 0.1557, + "step": 620 + }, + { + "epoch": 1.9113149847094801, + "grad_norm": 1.5419331789016724, + "learning_rate": 9.629994198450305e-06, + "loss": 0.1554, + "step": 625 + }, + { + "epoch": 1.926605504587156, + "grad_norm": 1.0265806913375854, + "learning_rate": 9.394461533031059e-06, + "loss": 0.1586, + "step": 630 + }, + { + "epoch": 1.9418960244648318, + "grad_norm": 1.1318252086639404, + "learning_rate": 9.160524553582518e-06, + "loss": 0.1411, + "step": 635 + }, + { + "epoch": 1.9571865443425076, + "grad_norm": 1.1243185997009277, + "learning_rate": 8.928249853164093e-06, + "loss": 0.1404, + "step": 640 + }, + { + "epoch": 1.9724770642201834, + "grad_norm": 1.053444504737854, + "learning_rate": 8.697703551646874e-06, + "loss": 0.1399, + "step": 645 + }, + { + "epoch": 1.9877675840978593, + "grad_norm": 1.0632541179656982, + "learning_rate": 8.468951276891778e-06, + "loss": 0.1364, + "step": 650 + }, + { + "epoch": 2.003058103975535, + "grad_norm": 0.9375919103622437, + "learning_rate": 8.242058146067796e-06, + "loss": 0.1383, + "step": 655 + }, + { + "epoch": 2.018348623853211, + "grad_norm": 1.1458889245986938, + "learning_rate": 8.017088747115554e-06, + "loss": 0.124, + "step": 660 + }, + { + "epoch": 2.0336391437308867, + "grad_norm": 1.0627604722976685, + "learning_rate": 7.794107120361572e-06, + "loss": 0.1068, + "step": 665 + }, + { + "epoch": 2.0489296636085625, + "grad_norm": 0.9701297283172607, + "learning_rate": 7.573176740288397e-06, + "loss": 0.1147, + "step": 670 + }, + { + "epoch": 2.0642201834862384, + "grad_norm": 1.0277825593948364, + "learning_rate": 7.354360497465774e-06, + "loss": 0.1125, + "step": 675 + }, + { + "epoch": 2.079510703363914, + "grad_norm": 1.3792563676834106, + "learning_rate": 7.137720680648107e-06, + "loss": 0.1164, + "step": 680 + }, + { + "epoch": 2.09480122324159, + "grad_norm": 0.8796865344047546, + "learning_rate": 6.923318959043171e-06, + "loss": 0.107, + "step": 685 + }, + { + "epoch": 2.1100917431192663, + "grad_norm": 0.8038296103477478, + "learning_rate": 6.7112163647572e-06, + "loss": 0.1033, + "step": 690 + }, + { + "epoch": 2.1253822629969417, + "grad_norm": 1.1331510543823242, + "learning_rate": 6.50147327542137e-06, + "loss": 0.1104, + "step": 695 + }, + { + "epoch": 2.140672782874618, + "grad_norm": 1.123464584350586, + "learning_rate": 6.294149397004524e-06, + "loss": 0.1102, + "step": 700 + }, + { + "epoch": 2.1559633027522938, + "grad_norm": 0.915915846824646, + "learning_rate": 6.089303746817157e-06, + "loss": 0.1083, + "step": 705 + }, + { + "epoch": 2.1712538226299696, + "grad_norm": 1.0867143869400024, + "learning_rate": 5.886994636711396e-06, + "loss": 0.0992, + "step": 710 + }, + { + "epoch": 2.1865443425076454, + "grad_norm": 0.9166132807731628, + "learning_rate": 5.687279656481812e-06, + "loss": 0.099, + "step": 715 + }, + { + "epoch": 2.2018348623853212, + "grad_norm": 1.3393880128860474, + "learning_rate": 5.490215657471752e-06, + "loss": 0.0987, + "step": 720 + }, + { + "epoch": 2.217125382262997, + "grad_norm": 1.0100187063217163, + "learning_rate": 5.295858736389932e-06, + "loss": 0.0912, + "step": 725 + }, + { + "epoch": 2.232415902140673, + "grad_norm": 0.9319810271263123, + "learning_rate": 5.104264219341793e-06, + "loss": 0.0905, + "step": 730 + }, + { + "epoch": 2.2477064220183487, + "grad_norm": 0.9899811148643494, + "learning_rate": 4.915486646080247e-06, + "loss": 0.0973, + "step": 735 + }, + { + "epoch": 2.2629969418960245, + "grad_norm": 0.980066180229187, + "learning_rate": 4.729579754480262e-06, + "loss": 0.1004, + "step": 740 + }, + { + "epoch": 2.2782874617737003, + "grad_norm": 0.7877005934715271, + "learning_rate": 4.54659646524169e-06, + "loss": 0.0905, + "step": 745 + }, + { + "epoch": 2.293577981651376, + "grad_norm": 0.9241505265235901, + "learning_rate": 4.366588866824769e-06, + "loss": 0.0989, + "step": 750 + }, + { + "epoch": 2.308868501529052, + "grad_norm": 0.8367293477058411, + "learning_rate": 4.1896082006224716e-06, + "loss": 0.0993, + "step": 755 + }, + { + "epoch": 2.324159021406728, + "grad_norm": 0.8464824557304382, + "learning_rate": 4.015704846374018e-06, + "loss": 0.0845, + "step": 760 + }, + { + "epoch": 2.3394495412844036, + "grad_norm": 1.1053506135940552, + "learning_rate": 3.844928307823655e-06, + "loss": 0.0905, + "step": 765 + }, + { + "epoch": 2.3547400611620795, + "grad_norm": 0.7858840823173523, + "learning_rate": 3.6773271986287993e-06, + "loss": 0.0889, + "step": 770 + }, + { + "epoch": 2.3700305810397553, + "grad_norm": 1.0228829383850098, + "learning_rate": 3.5129492285215713e-06, + "loss": 0.0892, + "step": 775 + }, + { + "epoch": 2.385321100917431, + "grad_norm": 0.9756034016609192, + "learning_rate": 3.3518411897276064e-06, + "loss": 0.0828, + "step": 780 + }, + { + "epoch": 2.400611620795107, + "grad_norm": 0.8147797584533691, + "learning_rate": 3.194048943646092e-06, + "loss": 0.076, + "step": 785 + }, + { + "epoch": 2.4159021406727827, + "grad_norm": 0.8570481538772583, + "learning_rate": 3.0396174077947416e-06, + "loss": 0.0971, + "step": 790 + }, + { + "epoch": 2.4311926605504586, + "grad_norm": 0.8327272534370422, + "learning_rate": 2.8885905430234504e-06, + "loss": 0.0899, + "step": 795 + }, + { + "epoch": 2.4464831804281344, + "grad_norm": 0.8355672955513, + "learning_rate": 2.74101134100033e-06, + "loss": 0.0918, + "step": 800 + }, + { + "epoch": 2.46177370030581, + "grad_norm": 0.8298751711845398, + "learning_rate": 2.5969218119735705e-06, + "loss": 0.087, + "step": 805 + }, + { + "epoch": 2.477064220183486, + "grad_norm": 0.8912408351898193, + "learning_rate": 2.456362972812714e-06, + "loss": 0.1117, + "step": 810 + }, + { + "epoch": 2.4923547400611623, + "grad_norm": 0.8181519508361816, + "learning_rate": 2.3193748353326786e-06, + "loss": 0.0736, + "step": 815 + }, + { + "epoch": 2.5076452599388377, + "grad_norm": 0.9035735726356506, + "learning_rate": 2.1859963949039258e-06, + "loss": 0.1009, + "step": 820 + }, + { + "epoch": 2.522935779816514, + "grad_norm": 0.8128873109817505, + "learning_rate": 2.0562656193519287e-06, + "loss": 0.0758, + "step": 825 + }, + { + "epoch": 2.5382262996941893, + "grad_norm": 0.7081366181373596, + "learning_rate": 1.930219438149159e-06, + "loss": 0.0806, + "step": 830 + }, + { + "epoch": 2.5535168195718656, + "grad_norm": 0.9195858240127563, + "learning_rate": 1.8078937319026655e-06, + "loss": 0.0873, + "step": 835 + }, + { + "epoch": 2.5688073394495414, + "grad_norm": 0.6684081554412842, + "learning_rate": 1.6893233221401966e-06, + "loss": 0.0861, + "step": 840 + }, + { + "epoch": 2.5840978593272173, + "grad_norm": 0.5922194123268127, + "learning_rate": 1.5745419613978463e-06, + "loss": 0.0869, + "step": 845 + }, + { + "epoch": 2.599388379204893, + "grad_norm": 0.7503398060798645, + "learning_rate": 1.4635823236119544e-06, + "loss": 0.0852, + "step": 850 + }, + { + "epoch": 2.614678899082569, + "grad_norm": 0.8443646430969238, + "learning_rate": 1.3564759948180816e-06, + "loss": 0.0824, + "step": 855 + }, + { + "epoch": 2.6299694189602447, + "grad_norm": 0.8841539025306702, + "learning_rate": 1.2532534641596345e-06, + "loss": 0.0828, + "step": 860 + }, + { + "epoch": 2.6452599388379205, + "grad_norm": 0.6965765357017517, + "learning_rate": 1.1539441152087783e-06, + "loss": 0.0714, + "step": 865 + }, + { + "epoch": 2.6605504587155964, + "grad_norm": 0.8007877469062805, + "learning_rate": 1.0585762176020148e-06, + "loss": 0.0888, + "step": 870 + }, + { + "epoch": 2.675840978593272, + "grad_norm": 0.6825544834136963, + "learning_rate": 9.67176918992894e-07, + "loss": 0.0731, + "step": 875 + }, + { + "epoch": 2.691131498470948, + "grad_norm": 0.6929968595504761, + "learning_rate": 8.797722373240985e-07, + "loss": 0.0863, + "step": 880 + }, + { + "epoch": 2.706422018348624, + "grad_norm": 0.8100355863571167, + "learning_rate": 7.963870534211093e-07, + "loss": 0.0851, + "step": 885 + }, + { + "epoch": 2.7217125382262997, + "grad_norm": 0.5955526232719421, + "learning_rate": 7.170451039095649e-07, + "loss": 0.0783, + "step": 890 + }, + { + "epoch": 2.7370030581039755, + "grad_norm": 0.7148454189300537, + "learning_rate": 6.417689744583504e-07, + "loss": 0.0732, + "step": 895 + }, + { + "epoch": 2.7522935779816513, + "grad_norm": 0.6212884187698364, + "learning_rate": 5.705800933502974e-07, + "loss": 0.076, + "step": 900 + }, + { + "epoch": 2.767584097859327, + "grad_norm": 0.6064401865005493, + "learning_rate": 5.034987253823614e-07, + "loss": 0.065, + "step": 905 + }, + { + "epoch": 2.782874617737003, + "grad_norm": 0.7183969616889954, + "learning_rate": 4.405439660969929e-07, + "loss": 0.0757, + "step": 910 + }, + { + "epoch": 2.7981651376146788, + "grad_norm": 0.9460102915763855, + "learning_rate": 3.8173373634635745e-07, + "loss": 0.0819, + "step": 915 + }, + { + "epoch": 2.8134556574923546, + "grad_norm": 0.8076668381690979, + "learning_rate": 3.270847771909463e-07, + "loss": 0.0892, + "step": 920 + }, + { + "epoch": 2.8287461773700304, + "grad_norm": 0.685032069683075, + "learning_rate": 2.766126451340184e-07, + "loss": 0.0827, + "step": 925 + }, + { + "epoch": 2.8440366972477067, + "grad_norm": 0.7841586470603943, + "learning_rate": 2.3033170769325763e-07, + "loss": 0.071, + "step": 930 + }, + { + "epoch": 2.859327217125382, + "grad_norm": 0.7632861137390137, + "learning_rate": 1.8825513931086947e-07, + "loss": 0.0849, + "step": 935 + }, + { + "epoch": 2.8746177370030583, + "grad_norm": 0.6218641996383667, + "learning_rate": 1.503949176033259e-07, + "loss": 0.0703, + "step": 940 + }, + { + "epoch": 2.8899082568807337, + "grad_norm": 0.7408595085144043, + "learning_rate": 1.1676181995177781e-07, + "loss": 0.0792, + "step": 945 + }, + { + "epoch": 2.90519877675841, + "grad_norm": 0.7664374709129333, + "learning_rate": 8.73654204341473e-08, + "loss": 0.0882, + "step": 950 + }, + { + "epoch": 2.9204892966360854, + "grad_norm": 0.6583572626113892, + "learning_rate": 6.221408709974619e-08, + "loss": 0.0904, + "step": 955 + }, + { + "epoch": 2.9357798165137616, + "grad_norm": 0.5235804915428162, + "learning_rate": 4.131497958720143e-08, + "loss": 0.0685, + "step": 960 + }, + { + "epoch": 2.9510703363914375, + "grad_norm": 0.6285778880119324, + "learning_rate": 2.4674047086383767e-08, + "loss": 0.0702, + "step": 965 + }, + { + "epoch": 2.9663608562691133, + "grad_norm": 0.6689122319221497, + "learning_rate": 1.2296026644890713e-08, + "loss": 0.0785, + "step": 970 + }, + { + "epoch": 2.981651376146789, + "grad_norm": 0.6397746205329895, + "learning_rate": 4.184441819588547e-09, + "loss": 0.0776, + "step": 975 + }, + { + "epoch": 2.996941896024465, + "grad_norm": 0.6537074446678162, + "learning_rate": 3.416016735929839e-10, + "loss": 0.0588, + "step": 980 + }, + { + "epoch": 3.0, + "step": 981, + "total_flos": 1.225514319484027e+18, + "train_loss": 0.4207725525358037, + "train_runtime": 994.6554, + "train_samples_per_second": 31.558, + "train_steps_per_second": 0.986 + } + ], + "logging_steps": 5, + "max_steps": 981, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.225514319484027e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/training_args.bin b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b29915a3b24176f47fafd66926e42afb3632a560 --- /dev/null +++ b/hotpotqa_test_knowledge_50_instruct/5_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4e77b65c659d98f936005911412ac35117351d4b874a87a79c0108ba473b8e +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e3c455d01ec696e1133c5f20af84a1dcfea9a3e4 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..408ef58f3ac8523c6bf83c0219e8f8058ab6644f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "v_proj", + "down_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0558553d4af470a2a7d2b429e5891152c11bef3f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c5b77b28c472d630c74c29e0c30b8d55939d72136df18fd4718f1064d8b64d +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3fb893f357c9951ca4376d6246d43b8408d765aa --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2487673941600502e+18, + "train_loss": 0.41707510350827587, + "train_runtime": 1012.4706, + "train_samples": 10926, + "train_samples_per_second": 32.374, + "train_steps_per_second": 1.013 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3fb893f357c9951ca4376d6246d43b8408d765aa --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2487673941600502e+18, + "train_loss": 0.41707510350827587, + "train_runtime": 1012.4706, + "train_samples": 10926, + "train_samples_per_second": 32.374, + "train_steps_per_second": 1.013 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a13444592f17b1e2942ca48ddbc78d5affcddc8a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1478 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1026, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014641288433382138, + "grad_norm": 0.536267876625061, + "learning_rate": 2.307692307692308e-06, + "loss": 1.6317, + "step": 5 + }, + { + "epoch": 0.029282576866764276, + "grad_norm": 0.7280184626579285, + "learning_rate": 5.192307692307692e-06, + "loss": 1.6272, + "step": 10 + }, + { + "epoch": 0.043923865300146414, + "grad_norm": 0.5147353410720825, + "learning_rate": 8.076923076923077e-06, + "loss": 1.631, + "step": 15 + }, + { + "epoch": 0.05856515373352855, + "grad_norm": 0.5578590035438538, + "learning_rate": 1.0961538461538462e-05, + "loss": 1.5653, + "step": 20 + }, + { + "epoch": 0.07320644216691069, + "grad_norm": 0.5893052816390991, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.5665, + "step": 25 + }, + { + "epoch": 0.08784773060029283, + "grad_norm": 0.5500059723854065, + "learning_rate": 1.673076923076923e-05, + "loss": 1.5776, + "step": 30 + }, + { + "epoch": 0.10248901903367497, + "grad_norm": 0.4985197186470032, + "learning_rate": 1.9615384615384617e-05, + "loss": 1.5604, + "step": 35 + }, + { + "epoch": 0.1171303074670571, + "grad_norm": 0.5037055015563965, + "learning_rate": 2.25e-05, + "loss": 1.4437, + "step": 40 + }, + { + "epoch": 0.13177159590043924, + "grad_norm": 0.5805780291557312, + "learning_rate": 2.5384615384615386e-05, + "loss": 1.4675, + "step": 45 + }, + { + "epoch": 0.14641288433382138, + "grad_norm": 0.5513816475868225, + "learning_rate": 2.8269230769230768e-05, + "loss": 1.5254, + "step": 50 + }, + { + "epoch": 0.16105417276720352, + "grad_norm": 0.5438980460166931, + "learning_rate": 2.9999687894386234e-05, + "loss": 1.422, + "step": 55 + }, + { + "epoch": 0.17569546120058566, + "grad_norm": 0.7122054696083069, + "learning_rate": 2.9996176855388602e-05, + "loss": 1.3692, + "step": 60 + }, + { + "epoch": 0.1903367496339678, + "grad_norm": 0.6227021217346191, + "learning_rate": 2.9988765561582723e-05, + "loss": 1.3425, + "step": 65 + }, + { + "epoch": 0.20497803806734993, + "grad_norm": 0.7129076719284058, + "learning_rate": 2.9977455940522168e-05, + "loss": 1.2913, + "step": 70 + }, + { + "epoch": 0.21961932650073207, + "grad_norm": 0.6445136666297913, + "learning_rate": 2.9962250933650244e-05, + "loss": 1.2596, + "step": 75 + }, + { + "epoch": 0.2342606149341142, + "grad_norm": 0.6235454678535461, + "learning_rate": 2.9943154495534944e-05, + "loss": 1.2597, + "step": 80 + }, + { + "epoch": 0.24890190336749635, + "grad_norm": 0.6740269064903259, + "learning_rate": 2.992017159284047e-05, + "loss": 1.2576, + "step": 85 + }, + { + "epoch": 0.2635431918008785, + "grad_norm": 0.6807919144630432, + "learning_rate": 2.9893308203035447e-05, + "loss": 1.1791, + "step": 90 + }, + { + "epoch": 0.2781844802342606, + "grad_norm": 0.7509133219718933, + "learning_rate": 2.9862571312838302e-05, + "loss": 1.1236, + "step": 95 + }, + { + "epoch": 0.29282576866764276, + "grad_norm": 0.7634111046791077, + "learning_rate": 2.982796891640015e-05, + "loss": 1.127, + "step": 100 + }, + { + "epoch": 0.3074670571010249, + "grad_norm": 0.8660857677459717, + "learning_rate": 2.9789510013225617e-05, + "loss": 1.1025, + "step": 105 + }, + { + "epoch": 0.32210834553440704, + "grad_norm": 0.9041235446929932, + "learning_rate": 2.9747204605832248e-05, + "loss": 1.0744, + "step": 110 + }, + { + "epoch": 0.3367496339677892, + "grad_norm": 0.8559146523475647, + "learning_rate": 2.9701063697148998e-05, + "loss": 1.0632, + "step": 115 + }, + { + "epoch": 0.3513909224011713, + "grad_norm": 0.8041065335273743, + "learning_rate": 2.965109928765457e-05, + "loss": 1.0391, + "step": 120 + }, + { + "epoch": 0.36603221083455345, + "grad_norm": 0.8681470155715942, + "learning_rate": 2.9597324372256287e-05, + "loss": 0.996, + "step": 125 + }, + { + "epoch": 0.3806734992679356, + "grad_norm": 1.365595817565918, + "learning_rate": 2.953975293691032e-05, + "loss": 0.9988, + "step": 130 + }, + { + "epoch": 0.3953147877013177, + "grad_norm": 0.8418181538581848, + "learning_rate": 2.9478399954984196e-05, + "loss": 0.9759, + "step": 135 + }, + { + "epoch": 0.40995607613469986, + "grad_norm": 0.9229922294616699, + "learning_rate": 2.9413281383362467e-05, + "loss": 0.9182, + "step": 140 + }, + { + "epoch": 0.424597364568082, + "grad_norm": 0.9954320192337036, + "learning_rate": 2.9344414158296585e-05, + "loss": 0.9241, + "step": 145 + }, + { + "epoch": 0.43923865300146414, + "grad_norm": 1.0408070087432861, + "learning_rate": 2.9271816191000075e-05, + "loss": 0.8662, + "step": 150 + }, + { + "epoch": 0.4538799414348463, + "grad_norm": 1.0453754663467407, + "learning_rate": 2.9195506362990113e-05, + "loss": 0.944, + "step": 155 + }, + { + "epoch": 0.4685212298682284, + "grad_norm": 1.141532301902771, + "learning_rate": 2.91155045211768e-05, + "loss": 0.8409, + "step": 160 + }, + { + "epoch": 0.48316251830161056, + "grad_norm": 1.0432106256484985, + "learning_rate": 2.9031831472701248e-05, + "loss": 0.8311, + "step": 165 + }, + { + "epoch": 0.4978038067349927, + "grad_norm": 1.0892516374588013, + "learning_rate": 2.8944508979524044e-05, + "loss": 0.7917, + "step": 170 + }, + { + "epoch": 0.5124450951683748, + "grad_norm": 1.097086787223816, + "learning_rate": 2.885355975276531e-05, + "loss": 0.7709, + "step": 175 + }, + { + "epoch": 0.527086383601757, + "grad_norm": 1.2212247848510742, + "learning_rate": 2.8759007446797916e-05, + "loss": 0.828, + "step": 180 + }, + { + "epoch": 0.541727672035139, + "grad_norm": 1.0514442920684814, + "learning_rate": 2.8660876653095372e-05, + "loss": 0.7827, + "step": 185 + }, + { + "epoch": 0.5563689604685212, + "grad_norm": 1.0739250183105469, + "learning_rate": 2.8559192893836018e-05, + "loss": 0.736, + "step": 190 + }, + { + "epoch": 0.5710102489019033, + "grad_norm": 1.2314304113388062, + "learning_rate": 2.84539826152651e-05, + "loss": 0.701, + "step": 195 + }, + { + "epoch": 0.5856515373352855, + "grad_norm": 1.3804528713226318, + "learning_rate": 2.8345273180816564e-05, + "loss": 0.6882, + "step": 200 + }, + { + "epoch": 0.6002928257686676, + "grad_norm": 1.2130359411239624, + "learning_rate": 2.8233092863996294e-05, + "loss": 0.7702, + "step": 205 + }, + { + "epoch": 0.6149341142020498, + "grad_norm": 1.164991855621338, + "learning_rate": 2.811747084102862e-05, + "loss": 0.7527, + "step": 210 + }, + { + "epoch": 0.6295754026354319, + "grad_norm": 1.197800636291504, + "learning_rate": 2.7998437183268078e-05, + "loss": 0.6175, + "step": 215 + }, + { + "epoch": 0.6442166910688141, + "grad_norm": 1.49165940284729, + "learning_rate": 2.7876022849378377e-05, + "loss": 0.667, + "step": 220 + }, + { + "epoch": 0.6588579795021962, + "grad_norm": 1.301142692565918, + "learning_rate": 2.7750259677280527e-05, + "loss": 0.6773, + "step": 225 + }, + { + "epoch": 0.6734992679355783, + "grad_norm": 1.191139817237854, + "learning_rate": 2.7621180375872376e-05, + "loss": 0.6221, + "step": 230 + }, + { + "epoch": 0.6881405563689604, + "grad_norm": 1.2943156957626343, + "learning_rate": 2.7488818516521524e-05, + "loss": 0.6809, + "step": 235 + }, + { + "epoch": 0.7027818448023426, + "grad_norm": 1.2157233953475952, + "learning_rate": 2.7353208524334014e-05, + "loss": 0.609, + "step": 240 + }, + { + "epoch": 0.7174231332357247, + "grad_norm": 1.2683531045913696, + "learning_rate": 2.7214385669200905e-05, + "loss": 0.7012, + "step": 245 + }, + { + "epoch": 0.7320644216691069, + "grad_norm": 1.176612138748169, + "learning_rate": 2.707238605662518e-05, + "loss": 0.6283, + "step": 250 + }, + { + "epoch": 0.746705710102489, + "grad_norm": 1.146350383758545, + "learning_rate": 2.692724661833131e-05, + "loss": 0.5522, + "step": 255 + }, + { + "epoch": 0.7613469985358712, + "grad_norm": 1.5264819860458374, + "learning_rate": 2.677900510265993e-05, + "loss": 0.6015, + "step": 260 + }, + { + "epoch": 0.7759882869692533, + "grad_norm": 1.2768447399139404, + "learning_rate": 2.6627700064750115e-05, + "loss": 0.5314, + "step": 265 + }, + { + "epoch": 0.7906295754026355, + "grad_norm": 1.3723030090332031, + "learning_rate": 2.647337085651184e-05, + "loss": 0.5806, + "step": 270 + }, + { + "epoch": 0.8052708638360175, + "grad_norm": 1.326313853263855, + "learning_rate": 2.631605761639121e-05, + "loss": 0.5394, + "step": 275 + }, + { + "epoch": 0.8199121522693997, + "grad_norm": 1.1591931581497192, + "learning_rate": 2.6155801258931115e-05, + "loss": 0.5042, + "step": 280 + }, + { + "epoch": 0.8345534407027818, + "grad_norm": 1.2869375944137573, + "learning_rate": 2.5992643464130054e-05, + "loss": 0.5716, + "step": 285 + }, + { + "epoch": 0.849194729136164, + "grad_norm": 1.2068252563476562, + "learning_rate": 2.582662666660185e-05, + "loss": 0.5273, + "step": 290 + }, + { + "epoch": 0.8638360175695461, + "grad_norm": 1.336654782295227, + "learning_rate": 2.5657794044539114e-05, + "loss": 0.4989, + "step": 295 + }, + { + "epoch": 0.8784773060029283, + "grad_norm": 1.358249545097351, + "learning_rate": 2.5486189508483345e-05, + "loss": 0.5007, + "step": 300 + }, + { + "epoch": 0.8931185944363104, + "grad_norm": 1.3531965017318726, + "learning_rate": 2.5311857689904497e-05, + "loss": 0.4954, + "step": 305 + }, + { + "epoch": 0.9077598828696926, + "grad_norm": 1.3650383949279785, + "learning_rate": 2.5134843929593113e-05, + "loss": 0.4609, + "step": 310 + }, + { + "epoch": 0.9224011713030746, + "grad_norm": 1.318924903869629, + "learning_rate": 2.4955194265867916e-05, + "loss": 0.4917, + "step": 315 + }, + { + "epoch": 0.9370424597364568, + "grad_norm": 1.4329462051391602, + "learning_rate": 2.4772955422602032e-05, + "loss": 0.453, + "step": 320 + }, + { + "epoch": 0.9516837481698389, + "grad_norm": 1.123244285583496, + "learning_rate": 2.4588174797070883e-05, + "loss": 0.4994, + "step": 325 + }, + { + "epoch": 0.9663250366032211, + "grad_norm": 1.3039047718048096, + "learning_rate": 2.4400900447624955e-05, + "loss": 0.4547, + "step": 330 + }, + { + "epoch": 0.9809663250366032, + "grad_norm": 1.2344906330108643, + "learning_rate": 2.4211181081190606e-05, + "loss": 0.4958, + "step": 335 + }, + { + "epoch": 0.9956076134699854, + "grad_norm": 1.5799585580825806, + "learning_rate": 2.4019066040602245e-05, + "loss": 0.433, + "step": 340 + }, + { + "epoch": 1.0087847730600292, + "grad_norm": 1.2939101457595825, + "learning_rate": 2.3824605291769043e-05, + "loss": 0.3795, + "step": 345 + }, + { + "epoch": 1.0234260614934114, + "grad_norm": 1.2845896482467651, + "learning_rate": 2.3627849410679667e-05, + "loss": 0.3756, + "step": 350 + }, + { + "epoch": 1.0380673499267936, + "grad_norm": 1.2641115188598633, + "learning_rate": 2.3428849570248295e-05, + "loss": 0.3839, + "step": 355 + }, + { + "epoch": 1.0527086383601758, + "grad_norm": 1.323004126548767, + "learning_rate": 2.322765752700541e-05, + "loss": 0.3488, + "step": 360 + }, + { + "epoch": 1.0673499267935578, + "grad_norm": 1.2138251066207886, + "learning_rate": 2.3024325607636782e-05, + "loss": 0.3645, + "step": 365 + }, + { + "epoch": 1.08199121522694, + "grad_norm": 1.1692546606063843, + "learning_rate": 2.2818906695374164e-05, + "loss": 0.3423, + "step": 370 + }, + { + "epoch": 1.0966325036603222, + "grad_norm": 1.4843940734863281, + "learning_rate": 2.2611454216241273e-05, + "loss": 0.3389, + "step": 375 + }, + { + "epoch": 1.1112737920937041, + "grad_norm": 1.3055230379104614, + "learning_rate": 2.2402022125158564e-05, + "loss": 0.3528, + "step": 380 + }, + { + "epoch": 1.1259150805270863, + "grad_norm": 1.2806233167648315, + "learning_rate": 2.2190664891910426e-05, + "loss": 0.3273, + "step": 385 + }, + { + "epoch": 1.1405563689604685, + "grad_norm": 1.2707644701004028, + "learning_rate": 2.1977437486978562e-05, + "loss": 0.3603, + "step": 390 + }, + { + "epoch": 1.1551976573938507, + "grad_norm": 1.1437355279922485, + "learning_rate": 2.1762395367245036e-05, + "loss": 0.3175, + "step": 395 + }, + { + "epoch": 1.169838945827233, + "grad_norm": 1.3131906986236572, + "learning_rate": 2.1545594461568883e-05, + "loss": 0.3393, + "step": 400 + }, + { + "epoch": 1.1844802342606149, + "grad_norm": 1.1617120504379272, + "learning_rate": 2.1327091156239953e-05, + "loss": 0.3129, + "step": 405 + }, + { + "epoch": 1.199121522693997, + "grad_norm": 1.3708487749099731, + "learning_rate": 2.1106942280313778e-05, + "loss": 0.2795, + "step": 410 + }, + { + "epoch": 1.2137628111273793, + "grad_norm": 1.3035938739776611, + "learning_rate": 2.0885205090831275e-05, + "loss": 0.319, + "step": 415 + }, + { + "epoch": 1.2284040995607612, + "grad_norm": 1.3703004121780396, + "learning_rate": 2.0661937257927164e-05, + "loss": 0.3333, + "step": 420 + }, + { + "epoch": 1.2430453879941434, + "grad_norm": 1.3056800365447998, + "learning_rate": 2.0437196849830908e-05, + "loss": 0.3031, + "step": 425 + }, + { + "epoch": 1.2576866764275256, + "grad_norm": 1.1743454933166504, + "learning_rate": 2.0211042317764158e-05, + "loss": 0.2994, + "step": 430 + }, + { + "epoch": 1.2723279648609078, + "grad_norm": 1.2546159029006958, + "learning_rate": 1.9983532480738567e-05, + "loss": 0.2909, + "step": 435 + }, + { + "epoch": 1.28696925329429, + "grad_norm": 1.2477736473083496, + "learning_rate": 1.975472651025793e-05, + "loss": 0.3046, + "step": 440 + }, + { + "epoch": 1.301610541727672, + "grad_norm": 1.5188379287719727, + "learning_rate": 1.952468391492868e-05, + "loss": 0.313, + "step": 445 + }, + { + "epoch": 1.3162518301610542, + "grad_norm": 1.1610665321350098, + "learning_rate": 1.9293464524982695e-05, + "loss": 0.3168, + "step": 450 + }, + { + "epoch": 1.3308931185944364, + "grad_norm": 1.2621536254882812, + "learning_rate": 1.9061128476716454e-05, + "loss": 0.2717, + "step": 455 + }, + { + "epoch": 1.3455344070278183, + "grad_norm": 1.1992082595825195, + "learning_rate": 1.8827736196850594e-05, + "loss": 0.2597, + "step": 460 + }, + { + "epoch": 1.3601756954612005, + "grad_norm": 1.3697268962860107, + "learning_rate": 1.8593348386813908e-05, + "loss": 0.3139, + "step": 465 + }, + { + "epoch": 1.3748169838945827, + "grad_norm": 1.1437841653823853, + "learning_rate": 1.8358026006955967e-05, + "loss": 0.258, + "step": 470 + }, + { + "epoch": 1.389458272327965, + "grad_norm": 1.3166323900222778, + "learning_rate": 1.8121830260692294e-05, + "loss": 0.272, + "step": 475 + }, + { + "epoch": 1.4040995607613471, + "grad_norm": 1.1545346975326538, + "learning_rate": 1.7884822578586426e-05, + "loss": 0.227, + "step": 480 + }, + { + "epoch": 1.418740849194729, + "grad_norm": 1.1189030408859253, + "learning_rate": 1.7647064602372828e-05, + "loss": 0.2568, + "step": 485 + }, + { + "epoch": 1.4333821376281113, + "grad_norm": 1.2730752229690552, + "learning_rate": 1.7408618168924913e-05, + "loss": 0.2727, + "step": 490 + }, + { + "epoch": 1.4480234260614935, + "grad_norm": 1.2722442150115967, + "learning_rate": 1.7169545294172315e-05, + "loss": 0.2806, + "step": 495 + }, + { + "epoch": 1.4626647144948755, + "grad_norm": 1.174206018447876, + "learning_rate": 1.6929908156971567e-05, + "loss": 0.2639, + "step": 500 + }, + { + "epoch": 1.4773060029282576, + "grad_norm": 1.2045493125915527, + "learning_rate": 1.668976908293443e-05, + "loss": 0.2556, + "step": 505 + }, + { + "epoch": 1.4919472913616398, + "grad_norm": 1.4125105142593384, + "learning_rate": 1.644919052821805e-05, + "loss": 0.2279, + "step": 510 + }, + { + "epoch": 1.506588579795022, + "grad_norm": 1.1820182800292969, + "learning_rate": 1.620823506328113e-05, + "loss": 0.2343, + "step": 515 + }, + { + "epoch": 1.5212298682284042, + "grad_norm": 1.514039158821106, + "learning_rate": 1.5966965356610438e-05, + "loss": 0.2375, + "step": 520 + }, + { + "epoch": 1.5358711566617862, + "grad_norm": 1.46183443069458, + "learning_rate": 1.5725444158421738e-05, + "loss": 0.2446, + "step": 525 + }, + { + "epoch": 1.5505124450951684, + "grad_norm": 1.0905009508132935, + "learning_rate": 1.5483734284339564e-05, + "loss": 0.2419, + "step": 530 + }, + { + "epoch": 1.5651537335285504, + "grad_norm": 1.2580227851867676, + "learning_rate": 1.524189859905987e-05, + "loss": 0.2288, + "step": 535 + }, + { + "epoch": 1.5797950219619326, + "grad_norm": 1.3676029443740845, + "learning_rate": 1.5e-05, + "loss": 0.2501, + "step": 540 + }, + { + "epoch": 1.5944363103953147, + "grad_norm": 1.1293681859970093, + "learning_rate": 1.4758101400940131e-05, + "loss": 0.2312, + "step": 545 + }, + { + "epoch": 1.609077598828697, + "grad_norm": 1.223874568939209, + "learning_rate": 1.4516265715660439e-05, + "loss": 0.2262, + "step": 550 + }, + { + "epoch": 1.6237188872620791, + "grad_norm": 1.2107800245285034, + "learning_rate": 1.427455584157826e-05, + "loss": 0.2292, + "step": 555 + }, + { + "epoch": 1.6383601756954613, + "grad_norm": 1.1685823202133179, + "learning_rate": 1.4033034643389571e-05, + "loss": 0.2096, + "step": 560 + }, + { + "epoch": 1.6530014641288433, + "grad_norm": 1.1717355251312256, + "learning_rate": 1.3791764936718871e-05, + "loss": 0.1925, + "step": 565 + }, + { + "epoch": 1.6676427525622255, + "grad_norm": 1.2912267446517944, + "learning_rate": 1.3550809471781956e-05, + "loss": 0.1873, + "step": 570 + }, + { + "epoch": 1.6822840409956075, + "grad_norm": 1.255232810974121, + "learning_rate": 1.3310230917065569e-05, + "loss": 0.2349, + "step": 575 + }, + { + "epoch": 1.6969253294289897, + "grad_norm": 1.2949752807617188, + "learning_rate": 1.3070091843028436e-05, + "loss": 0.2033, + "step": 580 + }, + { + "epoch": 1.7115666178623719, + "grad_norm": 1.2112230062484741, + "learning_rate": 1.2830454705827687e-05, + "loss": 0.2119, + "step": 585 + }, + { + "epoch": 1.726207906295754, + "grad_norm": 1.2727748155593872, + "learning_rate": 1.2591381831075091e-05, + "loss": 0.1957, + "step": 590 + }, + { + "epoch": 1.7408491947291362, + "grad_norm": 1.1411067247390747, + "learning_rate": 1.2352935397627178e-05, + "loss": 0.186, + "step": 595 + }, + { + "epoch": 1.7554904831625184, + "grad_norm": 1.1610162258148193, + "learning_rate": 1.211517742141358e-05, + "loss": 0.1878, + "step": 600 + }, + { + "epoch": 1.7701317715959004, + "grad_norm": 1.2632339000701904, + "learning_rate": 1.187816973930771e-05, + "loss": 0.1754, + "step": 605 + }, + { + "epoch": 1.7847730600292826, + "grad_norm": 1.1478899717330933, + "learning_rate": 1.1641973993044039e-05, + "loss": 0.1655, + "step": 610 + }, + { + "epoch": 1.7994143484626646, + "grad_norm": 1.2402925491333008, + "learning_rate": 1.1406651613186092e-05, + "loss": 0.1828, + "step": 615 + }, + { + "epoch": 1.8140556368960468, + "grad_norm": 1.1366699934005737, + "learning_rate": 1.117226380314941e-05, + "loss": 0.1498, + "step": 620 + }, + { + "epoch": 1.828696925329429, + "grad_norm": 1.2412633895874023, + "learning_rate": 1.0938871523283545e-05, + "loss": 0.1689, + "step": 625 + }, + { + "epoch": 1.8433382137628112, + "grad_norm": 1.2420597076416016, + "learning_rate": 1.0706535475017309e-05, + "loss": 0.1674, + "step": 630 + }, + { + "epoch": 1.8579795021961933, + "grad_norm": 1.34257173538208, + "learning_rate": 1.0475316085071322e-05, + "loss": 0.1922, + "step": 635 + }, + { + "epoch": 1.8726207906295755, + "grad_norm": 1.1188215017318726, + "learning_rate": 1.0245273489742078e-05, + "loss": 0.1533, + "step": 640 + }, + { + "epoch": 1.8872620790629575, + "grad_norm": 1.5311890840530396, + "learning_rate": 1.0016467519261434e-05, + "loss": 0.1566, + "step": 645 + }, + { + "epoch": 1.9019033674963397, + "grad_norm": 1.235378384590149, + "learning_rate": 9.788957682235845e-06, + "loss": 0.159, + "step": 650 + }, + { + "epoch": 1.9165446559297217, + "grad_norm": 1.3967939615249634, + "learning_rate": 9.562803150169093e-06, + "loss": 0.1702, + "step": 655 + }, + { + "epoch": 1.9311859443631039, + "grad_norm": 1.1832576990127563, + "learning_rate": 9.338062742072837e-06, + "loss": 0.1364, + "step": 660 + }, + { + "epoch": 1.945827232796486, + "grad_norm": 1.2193589210510254, + "learning_rate": 9.114794909168728e-06, + "loss": 0.1502, + "step": 665 + }, + { + "epoch": 1.9604685212298683, + "grad_norm": 1.2518123388290405, + "learning_rate": 8.893057719686225e-06, + "loss": 0.1626, + "step": 670 + }, + { + "epoch": 1.9751098096632504, + "grad_norm": 1.0509623289108276, + "learning_rate": 8.672908843760047e-06, + "loss": 0.1371, + "step": 675 + }, + { + "epoch": 1.9897510980966326, + "grad_norm": 1.0702646970748901, + "learning_rate": 8.454405538431125e-06, + "loss": 0.1511, + "step": 680 + }, + { + "epoch": 2.0029282576866763, + "grad_norm": 0.9390521049499512, + "learning_rate": 8.237604632754968e-06, + "loss": 0.1224, + "step": 685 + }, + { + "epoch": 2.0175695461200585, + "grad_norm": 1.000662922859192, + "learning_rate": 8.022562513021443e-06, + "loss": 0.1085, + "step": 690 + }, + { + "epoch": 2.0322108345534406, + "grad_norm": 0.9595516920089722, + "learning_rate": 7.809335108089575e-06, + "loss": 0.115, + "step": 695 + }, + { + "epoch": 2.046852122986823, + "grad_norm": 1.0055134296417236, + "learning_rate": 7.597977874841444e-06, + "loss": 0.1237, + "step": 700 + }, + { + "epoch": 2.061493411420205, + "grad_norm": 1.4545137882232666, + "learning_rate": 7.388545783758727e-06, + "loss": 0.1356, + "step": 705 + }, + { + "epoch": 2.0761346998535872, + "grad_norm": 1.1168686151504517, + "learning_rate": 7.181093304625841e-06, + "loss": 0.127, + "step": 710 + }, + { + "epoch": 2.0907759882869694, + "grad_norm": 1.0162161588668823, + "learning_rate": 6.975674392363223e-06, + "loss": 0.1243, + "step": 715 + }, + { + "epoch": 2.1054172767203516, + "grad_norm": 0.8323268890380859, + "learning_rate": 6.772342472994596e-06, + "loss": 0.0972, + "step": 720 + }, + { + "epoch": 2.1200585651537334, + "grad_norm": 0.8739067912101746, + "learning_rate": 6.571150429751707e-06, + "loss": 0.0874, + "step": 725 + }, + { + "epoch": 2.1346998535871156, + "grad_norm": 0.9426400661468506, + "learning_rate": 6.372150589320339e-06, + "loss": 0.1118, + "step": 730 + }, + { + "epoch": 2.1493411420204978, + "grad_norm": 0.9613558650016785, + "learning_rate": 6.1753947082309605e-06, + "loss": 0.1153, + "step": 735 + }, + { + "epoch": 2.16398243045388, + "grad_norm": 1.2301628589630127, + "learning_rate": 5.9809339593977555e-06, + "loss": 0.1217, + "step": 740 + }, + { + "epoch": 2.178623718887262, + "grad_norm": 1.121046781539917, + "learning_rate": 5.788818918809395e-06, + "loss": 0.1146, + "step": 745 + }, + { + "epoch": 2.1932650073206443, + "grad_norm": 1.1062817573547363, + "learning_rate": 5.599099552375048e-06, + "loss": 0.1089, + "step": 750 + }, + { + "epoch": 2.2079062957540265, + "grad_norm": 0.9042452573776245, + "learning_rate": 5.411825202929121e-06, + "loss": 0.1141, + "step": 755 + }, + { + "epoch": 2.2225475841874083, + "grad_norm": 1.0303082466125488, + "learning_rate": 5.227044577397972e-06, + "loss": 0.0992, + "step": 760 + }, + { + "epoch": 2.2371888726207905, + "grad_norm": 0.7369067668914795, + "learning_rate": 5.044805734132092e-06, + "loss": 0.0989, + "step": 765 + }, + { + "epoch": 2.2518301610541727, + "grad_norm": 0.9503814578056335, + "learning_rate": 4.865156070406892e-06, + "loss": 0.09, + "step": 770 + }, + { + "epoch": 2.266471449487555, + "grad_norm": 0.749426543712616, + "learning_rate": 4.6881423100955026e-06, + "loss": 0.1138, + "step": 775 + }, + { + "epoch": 2.281112737920937, + "grad_norm": 1.1008706092834473, + "learning_rate": 4.513810491516659e-06, + "loss": 0.1088, + "step": 780 + }, + { + "epoch": 2.2957540263543192, + "grad_norm": 0.8916003108024597, + "learning_rate": 4.342205955460885e-06, + "loss": 0.103, + "step": 785 + }, + { + "epoch": 2.3103953147877014, + "grad_norm": 0.8871491551399231, + "learning_rate": 4.173373333398154e-06, + "loss": 0.1095, + "step": 790 + }, + { + "epoch": 2.3250366032210836, + "grad_norm": 1.1557129621505737, + "learning_rate": 4.007356535869946e-06, + "loss": 0.1033, + "step": 795 + }, + { + "epoch": 2.339677891654466, + "grad_norm": 0.8498390913009644, + "learning_rate": 3.844198741068887e-06, + "loss": 0.1065, + "step": 800 + }, + { + "epoch": 2.3543191800878476, + "grad_norm": 0.7775346040725708, + "learning_rate": 3.6839423836087928e-06, + "loss": 0.091, + "step": 805 + }, + { + "epoch": 2.3689604685212298, + "grad_norm": 0.7734380960464478, + "learning_rate": 3.5266291434881648e-06, + "loss": 0.0957, + "step": 810 + }, + { + "epoch": 2.383601756954612, + "grad_norm": 0.9078114032745361, + "learning_rate": 3.3722999352498868e-06, + "loss": 0.1041, + "step": 815 + }, + { + "epoch": 2.398243045387994, + "grad_norm": 1.5250548124313354, + "learning_rate": 3.220994897340067e-06, + "loss": 0.0952, + "step": 820 + }, + { + "epoch": 2.4128843338213763, + "grad_norm": 0.9233692288398743, + "learning_rate": 3.07275338166869e-06, + "loss": 0.0826, + "step": 825 + }, + { + "epoch": 2.4275256222547585, + "grad_norm": 0.9495893716812134, + "learning_rate": 2.9276139433748227e-06, + "loss": 0.0868, + "step": 830 + }, + { + "epoch": 2.4421669106881407, + "grad_norm": 0.9489635825157166, + "learning_rate": 2.785614330799101e-06, + "loss": 0.0923, + "step": 835 + }, + { + "epoch": 2.4568081991215225, + "grad_norm": 0.8223833441734314, + "learning_rate": 2.6467914756659884e-06, + "loss": 0.0955, + "step": 840 + }, + { + "epoch": 2.4714494875549047, + "grad_norm": 0.8940200209617615, + "learning_rate": 2.5111814834784767e-06, + "loss": 0.0879, + "step": 845 + }, + { + "epoch": 2.486090775988287, + "grad_norm": 0.7751861810684204, + "learning_rate": 2.3788196241276246e-06, + "loss": 0.1002, + "step": 850 + }, + { + "epoch": 2.500732064421669, + "grad_norm": 0.8367605209350586, + "learning_rate": 2.24974032271947e-06, + "loss": 0.0793, + "step": 855 + }, + { + "epoch": 2.5153733528550513, + "grad_norm": 0.7056100368499756, + "learning_rate": 2.1239771506216267e-06, + "loss": 0.0996, + "step": 860 + }, + { + "epoch": 2.5300146412884335, + "grad_norm": 0.7140523791313171, + "learning_rate": 2.0015628167319206e-06, + "loss": 0.0872, + "step": 865 + }, + { + "epoch": 2.5446559297218156, + "grad_norm": 0.8810997009277344, + "learning_rate": 1.8825291589713834e-06, + "loss": 0.0814, + "step": 870 + }, + { + "epoch": 2.559297218155198, + "grad_norm": 0.7539772987365723, + "learning_rate": 1.7669071360037043e-06, + "loss": 0.0774, + "step": 875 + }, + { + "epoch": 2.57393850658858, + "grad_norm": 0.7513999342918396, + "learning_rate": 1.6547268191834364e-06, + "loss": 0.0913, + "step": 880 + }, + { + "epoch": 2.588579795021962, + "grad_norm": 0.8109718561172485, + "learning_rate": 1.546017384734903e-06, + "loss": 0.0837, + "step": 885 + }, + { + "epoch": 2.603221083455344, + "grad_norm": 0.5877496004104614, + "learning_rate": 1.4408071061639822e-06, + "loss": 0.0928, + "step": 890 + }, + { + "epoch": 2.617862371888726, + "grad_norm": 0.7407174110412598, + "learning_rate": 1.3391233469046276e-06, + "loss": 0.0871, + "step": 895 + }, + { + "epoch": 2.6325036603221084, + "grad_norm": 0.6631835699081421, + "learning_rate": 1.240992553202085e-06, + "loss": 0.0871, + "step": 900 + }, + { + "epoch": 2.6471449487554906, + "grad_norm": 0.6617799997329712, + "learning_rate": 1.1464402472346914e-06, + "loss": 0.066, + "step": 905 + }, + { + "epoch": 2.6617862371888728, + "grad_norm": 0.7644351124763489, + "learning_rate": 1.055491020475956e-06, + "loss": 0.0928, + "step": 910 + }, + { + "epoch": 2.6764275256222545, + "grad_norm": 0.802929162979126, + "learning_rate": 9.681685272987544e-07, + "loss": 0.0786, + "step": 915 + }, + { + "epoch": 2.6910688140556367, + "grad_norm": 0.7605863213539124, + "learning_rate": 8.844954788232012e-07, + "loss": 0.0923, + "step": 920 + }, + { + "epoch": 2.705710102489019, + "grad_norm": 0.6968623399734497, + "learning_rate": 8.044936370098849e-07, + "loss": 0.0789, + "step": 925 + }, + { + "epoch": 2.720351390922401, + "grad_norm": 1.0760986804962158, + "learning_rate": 7.281838089999287e-07, + "loss": 0.0853, + "step": 930 + }, + { + "epoch": 2.7349926793557833, + "grad_norm": 0.6675553321838379, + "learning_rate": 6.555858417034183e-07, + "loss": 0.0871, + "step": 935 + }, + { + "epoch": 2.7496339677891655, + "grad_norm": 0.9073206782341003, + "learning_rate": 5.867186166375349e-07, + "loss": 0.0854, + "step": 940 + }, + { + "epoch": 2.7642752562225477, + "grad_norm": 0.6968106031417847, + "learning_rate": 5.216000450158059e-07, + "loss": 0.092, + "step": 945 + }, + { + "epoch": 2.77891654465593, + "grad_norm": 0.8066695332527161, + "learning_rate": 4.602470630896827e-07, + "loss": 0.0822, + "step": 950 + }, + { + "epoch": 2.793557833089312, + "grad_norm": 0.7586339712142944, + "learning_rate": 4.026756277437155e-07, + "loss": 0.1057, + "step": 955 + }, + { + "epoch": 2.8081991215226942, + "grad_norm": 0.7263625860214233, + "learning_rate": 3.4890071234542856e-07, + "loss": 0.0872, + "step": 960 + }, + { + "epoch": 2.822840409956076, + "grad_norm": 0.692078173160553, + "learning_rate": 2.989363028510017e-07, + "loss": 0.0757, + "step": 965 + }, + { + "epoch": 2.837481698389458, + "grad_norm": 0.5698473453521729, + "learning_rate": 2.5279539416775356e-07, + "loss": 0.0739, + "step": 970 + }, + { + "epoch": 2.8521229868228404, + "grad_norm": 0.8064164519309998, + "learning_rate": 2.1048998677438315e-07, + "loss": 0.092, + "step": 975 + }, + { + "epoch": 2.8667642752562226, + "grad_norm": 0.6845740079879761, + "learning_rate": 1.7203108359985176e-07, + "loss": 0.0771, + "step": 980 + }, + { + "epoch": 2.8814055636896048, + "grad_norm": 0.8376089930534363, + "learning_rate": 1.3742868716170032e-07, + "loss": 0.084, + "step": 985 + }, + { + "epoch": 2.896046852122987, + "grad_norm": 0.9543599486351013, + "learning_rate": 1.06691796964557e-07, + "loss": 0.0807, + "step": 990 + }, + { + "epoch": 2.9106881405563687, + "grad_norm": 0.5678905844688416, + "learning_rate": 7.982840715953288e-08, + "loss": 0.0706, + "step": 995 + }, + { + "epoch": 2.925329428989751, + "grad_norm": 0.6942958235740662, + "learning_rate": 5.68455044650551e-08, + "loss": 0.0967, + "step": 1000 + }, + { + "epoch": 2.939970717423133, + "grad_norm": 0.8430362939834595, + "learning_rate": 3.774906634975872e-08, + "loss": 0.0795, + "step": 1005 + }, + { + "epoch": 2.9546120058565153, + "grad_norm": 0.902743935585022, + "learning_rate": 2.2544059477832e-08, + "loss": 0.0947, + "step": 1010 + }, + { + "epoch": 2.9692532942898975, + "grad_norm": 0.7746142745018005, + "learning_rate": 1.1234438417279668e-08, + "loss": 0.0794, + "step": 1015 + }, + { + "epoch": 2.9838945827232797, + "grad_norm": 0.8394314050674438, + "learning_rate": 3.8231446113989876e-09, + "loss": 0.1001, + "step": 1020 + }, + { + "epoch": 2.998535871156662, + "grad_norm": 0.6579728722572327, + "learning_rate": 3.1210561376504574e-10, + "loss": 0.0844, + "step": 1025 + }, + { + "epoch": 3.0, + "step": 1026, + "total_flos": 1.2487673941600502e+18, + "train_loss": 0.41707510350827587, + "train_runtime": 1012.4706, + "train_samples_per_second": 32.374, + "train_steps_per_second": 1.013 + } + ], + "logging_steps": 5, + "max_steps": 1026, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2487673941600502e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b953de99315f67bbeac473db8b93398fddb6ce1 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3323ac7c82e2b526492fa548ecf5b449bb3e45ebae76bf89ece917a017b9dcda +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1612732388e3238b3acc0cdf834f5f320a5b4378 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 10_128_e3_3e-5 + results: [] +--- + + + +# 10_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..13d7bb616b9bf3f611ea2d01136c1de732fa2a4f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "gate_proj", + "q_proj", + "o_proj", + "k_proj", + "down_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfead64765bda11815b2c063434a81c0ca184d92 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b45ce4ff00ac72a9ade37def5ea397b13cba7e37c81ab13fdc888bc73c276f +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1e140a356c3f87a1a8119e80f19dfaade37a7483 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3653474920061993e+18, + "train_loss": 0.42329370426066093, + "train_runtime": 1085.1535, + "train_samples": 11451, + "train_samples_per_second": 31.657, + "train_steps_per_second": 0.99 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1e140a356c3f87a1a8119e80f19dfaade37a7483 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3653474920061993e+18, + "train_loss": 0.42329370426066093, + "train_runtime": 1085.1535, + "train_samples": 11451, + "train_samples_per_second": 31.657, + "train_steps_per_second": 0.99 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e2128e6e8dd4c8f2fcde5432bb13493402373d3f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1541 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1074, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013966480446927373, + "grad_norm": 1.1051961183547974, + "learning_rate": 2.222222222222222e-06, + "loss": 1.5373, + "step": 5 + }, + { + "epoch": 0.027932960893854747, + "grad_norm": 0.671780526638031, + "learning_rate": 4.9999999999999996e-06, + "loss": 1.5649, + "step": 10 + }, + { + "epoch": 0.04189944134078212, + "grad_norm": 0.5117776393890381, + "learning_rate": 7.777777777777777e-06, + "loss": 1.4991, + "step": 15 + }, + { + "epoch": 0.055865921787709494, + "grad_norm": 0.5335372686386108, + "learning_rate": 1.0555555555555555e-05, + "loss": 1.4801, + "step": 20 + }, + { + "epoch": 0.06983240223463687, + "grad_norm": 0.47958508133888245, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.5307, + "step": 25 + }, + { + "epoch": 0.08379888268156424, + "grad_norm": 0.5334808826446533, + "learning_rate": 1.6111111111111115e-05, + "loss": 1.4935, + "step": 30 + }, + { + "epoch": 0.09776536312849161, + "grad_norm": 0.5791395902633667, + "learning_rate": 1.888888888888889e-05, + "loss": 1.4728, + "step": 35 + }, + { + "epoch": 0.11173184357541899, + "grad_norm": 0.5127893090248108, + "learning_rate": 2.1666666666666667e-05, + "loss": 1.3673, + "step": 40 + }, + { + "epoch": 0.12569832402234637, + "grad_norm": 0.4711889624595642, + "learning_rate": 2.4444444444444445e-05, + "loss": 1.4604, + "step": 45 + }, + { + "epoch": 0.13966480446927373, + "grad_norm": 0.5687071084976196, + "learning_rate": 2.7222222222222223e-05, + "loss": 1.3703, + "step": 50 + }, + { + "epoch": 0.15363128491620112, + "grad_norm": 0.5968135595321655, + "learning_rate": 3e-05, + "loss": 1.3541, + "step": 55 + }, + { + "epoch": 0.16759776536312848, + "grad_norm": 0.5591203570365906, + "learning_rate": 2.9998221343471153e-05, + "loss": 1.3242, + "step": 60 + }, + { + "epoch": 0.18156424581005587, + "grad_norm": 0.6399550437927246, + "learning_rate": 2.9992885795700492e-05, + "loss": 1.2645, + "step": 65 + }, + { + "epoch": 0.19553072625698323, + "grad_norm": 0.6517438292503357, + "learning_rate": 2.998399462203559e-05, + "loss": 1.2928, + "step": 70 + }, + { + "epoch": 0.20949720670391062, + "grad_norm": 0.6443610787391663, + "learning_rate": 2.9971549931055665e-05, + "loss": 1.2667, + "step": 75 + }, + { + "epoch": 0.22346368715083798, + "grad_norm": 0.6960833072662354, + "learning_rate": 2.9955554674071492e-05, + "loss": 1.2621, + "step": 80 + }, + { + "epoch": 0.23743016759776536, + "grad_norm": 0.7616862654685974, + "learning_rate": 2.9936012644425518e-05, + "loss": 1.2063, + "step": 85 + }, + { + "epoch": 0.25139664804469275, + "grad_norm": 0.6843656897544861, + "learning_rate": 2.991292847659222e-05, + "loss": 1.1652, + "step": 90 + }, + { + "epoch": 0.26536312849162014, + "grad_norm": 0.993890643119812, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1669, + "step": 95 + }, + { + "epoch": 0.27932960893854747, + "grad_norm": 1.092720627784729, + "learning_rate": 2.9856156463128078e-05, + "loss": 1.1247, + "step": 100 + }, + { + "epoch": 0.29329608938547486, + "grad_norm": 0.8143765926361084, + "learning_rate": 2.982248208121889e-05, + "loss": 1.1169, + "step": 105 + }, + { + "epoch": 0.30726256983240224, + "grad_norm": 0.963901698589325, + "learning_rate": 2.9785292485372714e-05, + "loss": 1.156, + "step": 110 + }, + { + "epoch": 0.32122905027932963, + "grad_norm": 0.7332763671875, + "learning_rate": 2.974459649525853e-05, + "loss": 1.0636, + "step": 115 + }, + { + "epoch": 0.33519553072625696, + "grad_norm": 0.8999432325363159, + "learning_rate": 2.970040376210148e-05, + "loss": 1.0194, + "step": 120 + }, + { + "epoch": 0.34916201117318435, + "grad_norm": 0.8240070343017578, + "learning_rate": 2.9652724766394012e-05, + "loss": 1.0538, + "step": 125 + }, + { + "epoch": 0.36312849162011174, + "grad_norm": 0.8722200989723206, + "learning_rate": 2.9601570815410394e-05, + "loss": 1.0837, + "step": 130 + }, + { + "epoch": 0.3770949720670391, + "grad_norm": 0.8946022391319275, + "learning_rate": 2.9546954040525144e-05, + "loss": 1.0138, + "step": 135 + }, + { + "epoch": 0.39106145251396646, + "grad_norm": 1.0152587890625, + "learning_rate": 2.9488887394336025e-05, + "loss": 0.9292, + "step": 140 + }, + { + "epoch": 0.40502793296089384, + "grad_norm": 0.8060712218284607, + "learning_rate": 2.942738464759229e-05, + "loss": 0.9941, + "step": 145 + }, + { + "epoch": 0.41899441340782123, + "grad_norm": 0.8640080094337463, + "learning_rate": 2.9362460385928864e-05, + "loss": 0.8851, + "step": 150 + }, + { + "epoch": 0.4329608938547486, + "grad_norm": 1.029614806175232, + "learning_rate": 2.929413000640735e-05, + "loss": 0.9275, + "step": 155 + }, + { + "epoch": 0.44692737430167595, + "grad_norm": 1.0977739095687866, + "learning_rate": 2.922240971386449e-05, + "loss": 0.9332, + "step": 160 + }, + { + "epoch": 0.46089385474860334, + "grad_norm": 0.9147678017616272, + "learning_rate": 2.9147316517069164e-05, + "loss": 0.866, + "step": 165 + }, + { + "epoch": 0.4748603351955307, + "grad_norm": 1.137666940689087, + "learning_rate": 2.9068868224688674e-05, + "loss": 0.8503, + "step": 170 + }, + { + "epoch": 0.4888268156424581, + "grad_norm": 0.9705405831336975, + "learning_rate": 2.8987083441065335e-05, + "loss": 0.8527, + "step": 175 + }, + { + "epoch": 0.5027932960893855, + "grad_norm": 0.9750431776046753, + "learning_rate": 2.8901981561804408e-05, + "loss": 0.8228, + "step": 180 + }, + { + "epoch": 0.5167597765363129, + "grad_norm": 1.0630513429641724, + "learning_rate": 2.8813582769174304e-05, + "loss": 0.8433, + "step": 185 + }, + { + "epoch": 0.5307262569832403, + "grad_norm": 1.1255079507827759, + "learning_rate": 2.8721908027320315e-05, + "loss": 0.7894, + "step": 190 + }, + { + "epoch": 0.5446927374301676, + "grad_norm": 1.0003700256347656, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.7694, + "step": 195 + }, + { + "epoch": 0.5586592178770949, + "grad_norm": 1.0122191905975342, + "learning_rate": 2.8528818431891495e-05, + "loss": 0.7198, + "step": 200 + }, + { + "epoch": 0.5726256983240223, + "grad_norm": 1.0526182651519775, + "learning_rate": 2.8427449370325938e-05, + "loss": 0.7384, + "step": 205 + }, + { + "epoch": 0.5865921787709497, + "grad_norm": 1.2404016256332397, + "learning_rate": 2.8322895932695272e-05, + "loss": 0.7108, + "step": 210 + }, + { + "epoch": 0.6005586592178771, + "grad_norm": 1.0305923223495483, + "learning_rate": 2.8215182914286768e-05, + "loss": 0.7174, + "step": 215 + }, + { + "epoch": 0.6145251396648045, + "grad_norm": 1.1633076667785645, + "learning_rate": 2.8104335859695545e-05, + "loss": 0.7012, + "step": 220 + }, + { + "epoch": 0.6284916201117319, + "grad_norm": 1.175048589706421, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.7324, + "step": 225 + }, + { + "epoch": 0.6424581005586593, + "grad_norm": 1.193458080291748, + "learning_rate": 2.787334553036044e-05, + "loss": 0.6352, + "step": 230 + }, + { + "epoch": 0.6564245810055865, + "grad_norm": 1.2989252805709839, + "learning_rate": 2.7753257035944216e-05, + "loss": 0.6811, + "step": 235 + }, + { + "epoch": 0.6703910614525139, + "grad_norm": 1.1719774007797241, + "learning_rate": 2.763014405300918e-05, + "loss": 0.6593, + "step": 240 + }, + { + "epoch": 0.6843575418994413, + "grad_norm": 1.0746477842330933, + "learning_rate": 2.750403577831679e-05, + "loss": 0.6725, + "step": 245 + }, + { + "epoch": 0.6983240223463687, + "grad_norm": 1.2245765924453735, + "learning_rate": 2.7374962118974533e-05, + "loss": 0.6327, + "step": 250 + }, + { + "epoch": 0.7122905027932961, + "grad_norm": 1.1319698095321655, + "learning_rate": 2.7242953685343327e-05, + "loss": 0.6433, + "step": 255 + }, + { + "epoch": 0.7262569832402235, + "grad_norm": 1.1353050470352173, + "learning_rate": 2.7108041783778144e-05, + "loss": 0.7012, + "step": 260 + }, + { + "epoch": 0.7402234636871509, + "grad_norm": 1.0978429317474365, + "learning_rate": 2.6970258409203596e-05, + "loss": 0.6176, + "step": 265 + }, + { + "epoch": 0.7541899441340782, + "grad_norm": 1.239736795425415, + "learning_rate": 2.6829636237526175e-05, + "loss": 0.652, + "step": 270 + }, + { + "epoch": 0.7681564245810056, + "grad_norm": 1.2181453704833984, + "learning_rate": 2.6686208617885057e-05, + "loss": 0.62, + "step": 275 + }, + { + "epoch": 0.7821229050279329, + "grad_norm": 1.2852040529251099, + "learning_rate": 2.6540009564743186e-05, + "loss": 0.6347, + "step": 280 + }, + { + "epoch": 0.7960893854748603, + "grad_norm": 1.1914550065994263, + "learning_rate": 2.639107374982061e-05, + "loss": 0.5774, + "step": 285 + }, + { + "epoch": 0.8100558659217877, + "grad_norm": 1.091722846031189, + "learning_rate": 2.623943649387194e-05, + "loss": 0.5581, + "step": 290 + }, + { + "epoch": 0.8240223463687151, + "grad_norm": 1.1445775032043457, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.5565, + "step": 295 + }, + { + "epoch": 0.8379888268156425, + "grad_norm": 1.3087140321731567, + "learning_rate": 2.5928202136676856e-05, + "loss": 0.5876, + "step": 300 + }, + { + "epoch": 0.8519553072625698, + "grad_norm": 1.2477210760116577, + "learning_rate": 2.576867884596663e-05, + "loss": 0.5562, + "step": 305 + }, + { + "epoch": 0.8659217877094972, + "grad_norm": 1.5361151695251465, + "learning_rate": 2.5606601717798212e-05, + "loss": 0.5213, + "step": 310 + }, + { + "epoch": 0.8798882681564246, + "grad_norm": 1.3265758752822876, + "learning_rate": 2.5442009189443902e-05, + "loss": 0.4935, + "step": 315 + }, + { + "epoch": 0.8938547486033519, + "grad_norm": 1.2183924913406372, + "learning_rate": 2.527494029471371e-05, + "loss": 0.4933, + "step": 320 + }, + { + "epoch": 0.9078212290502793, + "grad_norm": 1.2504791021347046, + "learning_rate": 2.510543465469836e-05, + "loss": 0.5332, + "step": 325 + }, + { + "epoch": 0.9217877094972067, + "grad_norm": 1.3123568296432495, + "learning_rate": 2.4933532468372955e-05, + "loss": 0.4978, + "step": 330 + }, + { + "epoch": 0.9357541899441341, + "grad_norm": 1.2381938695907593, + "learning_rate": 2.4759274503063632e-05, + "loss": 0.5042, + "step": 335 + }, + { + "epoch": 0.9497206703910615, + "grad_norm": 1.1595544815063477, + "learning_rate": 2.458270208477942e-05, + "loss": 0.4889, + "step": 340 + }, + { + "epoch": 0.9636871508379888, + "grad_norm": 1.1798391342163086, + "learning_rate": 2.44038570884116e-05, + "loss": 0.4853, + "step": 345 + }, + { + "epoch": 0.9776536312849162, + "grad_norm": 1.3338872194290161, + "learning_rate": 2.422278192780289e-05, + "loss": 0.5055, + "step": 350 + }, + { + "epoch": 0.9916201117318436, + "grad_norm": 1.2519588470458984, + "learning_rate": 2.4039519545688848e-05, + "loss": 0.5015, + "step": 355 + }, + { + "epoch": 1.005586592178771, + "grad_norm": 1.2833123207092285, + "learning_rate": 2.3854113403513795e-05, + "loss": 0.4247, + "step": 360 + }, + { + "epoch": 1.0195530726256983, + "grad_norm": 1.3393051624298096, + "learning_rate": 2.3666607471123768e-05, + "loss": 0.4825, + "step": 365 + }, + { + "epoch": 1.0335195530726258, + "grad_norm": 1.3647127151489258, + "learning_rate": 2.3477046216338877e-05, + "loss": 0.3771, + "step": 370 + }, + { + "epoch": 1.047486033519553, + "grad_norm": 1.310333013534546, + "learning_rate": 2.3285474594407588e-05, + "loss": 0.4266, + "step": 375 + }, + { + "epoch": 1.0614525139664805, + "grad_norm": 1.1945815086364746, + "learning_rate": 2.3091938037345373e-05, + "loss": 0.3838, + "step": 380 + }, + { + "epoch": 1.0754189944134078, + "grad_norm": 1.294047236442566, + "learning_rate": 2.2896482443160337e-05, + "loss": 0.3687, + "step": 385 + }, + { + "epoch": 1.089385474860335, + "grad_norm": 1.341181993484497, + "learning_rate": 2.2699154164968308e-05, + "loss": 0.3849, + "step": 390 + }, + { + "epoch": 1.1033519553072626, + "grad_norm": 1.1444048881530762, + "learning_rate": 2.25e-05, + "loss": 0.413, + "step": 395 + }, + { + "epoch": 1.1173184357541899, + "grad_norm": 1.2892634868621826, + "learning_rate": 2.2299067178502843e-05, + "loss": 0.3649, + "step": 400 + }, + { + "epoch": 1.1312849162011174, + "grad_norm": 1.1524863243103027, + "learning_rate": 2.2096403352540153e-05, + "loss": 0.3899, + "step": 405 + }, + { + "epoch": 1.1452513966480447, + "grad_norm": 1.2742811441421509, + "learning_rate": 2.1892056584690214e-05, + "loss": 0.3733, + "step": 410 + }, + { + "epoch": 1.1592178770949721, + "grad_norm": 1.444509744644165, + "learning_rate": 2.1686075336648075e-05, + "loss": 0.3424, + "step": 415 + }, + { + "epoch": 1.1731843575418994, + "grad_norm": 1.4962750673294067, + "learning_rate": 2.1478508457732617e-05, + "loss": 0.3222, + "step": 420 + }, + { + "epoch": 1.1871508379888267, + "grad_norm": 1.1962943077087402, + "learning_rate": 2.1269405173301752e-05, + "loss": 0.3324, + "step": 425 + }, + { + "epoch": 1.2011173184357542, + "grad_norm": 1.4302680492401123, + "learning_rate": 2.1058815073078425e-05, + "loss": 0.3387, + "step": 430 + }, + { + "epoch": 1.2150837988826815, + "grad_norm": 1.2922435998916626, + "learning_rate": 2.084678809939019e-05, + "loss": 0.3047, + "step": 435 + }, + { + "epoch": 1.229050279329609, + "grad_norm": 1.2379450798034668, + "learning_rate": 2.063337453532519e-05, + "loss": 0.355, + "step": 440 + }, + { + "epoch": 1.2430167597765363, + "grad_norm": 1.3427166938781738, + "learning_rate": 2.0418624992807297e-05, + "loss": 0.3328, + "step": 445 + }, + { + "epoch": 1.2569832402234637, + "grad_norm": 1.3227717876434326, + "learning_rate": 2.0202590400593287e-05, + "loss": 0.3437, + "step": 450 + }, + { + "epoch": 1.270949720670391, + "grad_norm": 1.2782284021377563, + "learning_rate": 1.9985321992194896e-05, + "loss": 0.3105, + "step": 455 + }, + { + "epoch": 1.2849162011173183, + "grad_norm": 1.2212433815002441, + "learning_rate": 1.9766871293728525e-05, + "loss": 0.3098, + "step": 460 + }, + { + "epoch": 1.2988826815642458, + "grad_norm": 1.3072829246520996, + "learning_rate": 1.9547290111695654e-05, + "loss": 0.3233, + "step": 465 + }, + { + "epoch": 1.3128491620111733, + "grad_norm": 1.4570711851119995, + "learning_rate": 1.9326630520696683e-05, + "loss": 0.3319, + "step": 470 + }, + { + "epoch": 1.3268156424581006, + "grad_norm": 1.1446694135665894, + "learning_rate": 1.9104944851081247e-05, + "loss": 0.2602, + "step": 475 + }, + { + "epoch": 1.3407821229050279, + "grad_norm": 1.4602057933807373, + "learning_rate": 1.888228567653781e-05, + "loss": 0.315, + "step": 480 + }, + { + "epoch": 1.3547486033519553, + "grad_norm": 1.3967688083648682, + "learning_rate": 1.8658705801625657e-05, + "loss": 0.3222, + "step": 485 + }, + { + "epoch": 1.3687150837988826, + "grad_norm": 1.2128736972808838, + "learning_rate": 1.843425824925201e-05, + "loss": 0.315, + "step": 490 + }, + { + "epoch": 1.3826815642458101, + "grad_norm": 1.2534290552139282, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.2667, + "step": 495 + }, + { + "epoch": 1.3966480446927374, + "grad_norm": 1.2720451354980469, + "learning_rate": 1.798297321999255e-05, + "loss": 0.2522, + "step": 500 + }, + { + "epoch": 1.410614525139665, + "grad_norm": 1.2320210933685303, + "learning_rate": 1.775624276724856e-05, + "loss": 0.2575, + "step": 505 + }, + { + "epoch": 1.4245810055865922, + "grad_norm": 1.1688010692596436, + "learning_rate": 1.752885865994549e-05, + "loss": 0.2554, + "step": 510 + }, + { + "epoch": 1.4385474860335195, + "grad_norm": 1.2728002071380615, + "learning_rate": 1.7300874823180284e-05, + "loss": 0.2504, + "step": 515 + }, + { + "epoch": 1.452513966480447, + "grad_norm": 1.2793279886245728, + "learning_rate": 1.7072345324278235e-05, + "loss": 0.2653, + "step": 520 + }, + { + "epoch": 1.4664804469273742, + "grad_norm": 1.3310036659240723, + "learning_rate": 1.6843324359970714e-05, + "loss": 0.2419, + "step": 525 + }, + { + "epoch": 1.4804469273743017, + "grad_norm": 1.4237326383590698, + "learning_rate": 1.6613866243542173e-05, + "loss": 0.2731, + "step": 530 + }, + { + "epoch": 1.494413407821229, + "grad_norm": 1.1445667743682861, + "learning_rate": 1.638402539194953e-05, + "loss": 0.208, + "step": 535 + }, + { + "epoch": 1.5083798882681565, + "grad_norm": 1.1761115789413452, + "learning_rate": 1.6153856312916957e-05, + "loss": 0.2238, + "step": 540 + }, + { + "epoch": 1.5223463687150838, + "grad_norm": 1.2696056365966797, + "learning_rate": 1.5923413592009145e-05, + "loss": 0.2418, + "step": 545 + }, + { + "epoch": 1.536312849162011, + "grad_norm": 1.1580119132995605, + "learning_rate": 1.5692751879686097e-05, + "loss": 0.2154, + "step": 550 + }, + { + "epoch": 1.5502793296089385, + "grad_norm": 1.2473526000976562, + "learning_rate": 1.5461925878342558e-05, + "loss": 0.2425, + "step": 555 + }, + { + "epoch": 1.564245810055866, + "grad_norm": 1.2904664278030396, + "learning_rate": 1.5230990329335104e-05, + "loss": 0.2366, + "step": 560 + }, + { + "epoch": 1.5782122905027933, + "grad_norm": 1.1771873235702515, + "learning_rate": 1.5e-05, + "loss": 0.2132, + "step": 565 + }, + { + "epoch": 1.5921787709497206, + "grad_norm": 1.1184929609298706, + "learning_rate": 1.4769009670664899e-05, + "loss": 0.1869, + "step": 570 + }, + { + "epoch": 1.606145251396648, + "grad_norm": 1.3634597063064575, + "learning_rate": 1.4538074121657448e-05, + "loss": 0.21, + "step": 575 + }, + { + "epoch": 1.6201117318435754, + "grad_norm": 1.2137888669967651, + "learning_rate": 1.430724812031391e-05, + "loss": 0.2016, + "step": 580 + }, + { + "epoch": 1.6340782122905027, + "grad_norm": 1.1751962900161743, + "learning_rate": 1.4076586407990858e-05, + "loss": 0.2209, + "step": 585 + }, + { + "epoch": 1.6480446927374302, + "grad_norm": 1.446349859237671, + "learning_rate": 1.3846143687083044e-05, + "loss": 0.2268, + "step": 590 + }, + { + "epoch": 1.6620111731843576, + "grad_norm": 1.258285641670227, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2491, + "step": 595 + }, + { + "epoch": 1.675977653631285, + "grad_norm": 1.2441043853759766, + "learning_rate": 1.3386133756457831e-05, + "loss": 0.2352, + "step": 600 + }, + { + "epoch": 1.6899441340782122, + "grad_norm": 1.1855571269989014, + "learning_rate": 1.3156675640029292e-05, + "loss": 0.2359, + "step": 605 + }, + { + "epoch": 1.7039106145251397, + "grad_norm": 1.2200303077697754, + "learning_rate": 1.2927654675721771e-05, + "loss": 0.237, + "step": 610 + }, + { + "epoch": 1.7178770949720672, + "grad_norm": 1.1969456672668457, + "learning_rate": 1.2699125176819717e-05, + "loss": 0.1997, + "step": 615 + }, + { + "epoch": 1.7318435754189943, + "grad_norm": 1.2466518878936768, + "learning_rate": 1.2471141340054511e-05, + "loss": 0.237, + "step": 620 + }, + { + "epoch": 1.7458100558659218, + "grad_norm": 1.1464226245880127, + "learning_rate": 1.2243757232751444e-05, + "loss": 0.1858, + "step": 625 + }, + { + "epoch": 1.7597765363128492, + "grad_norm": 1.0559086799621582, + "learning_rate": 1.2017026780007454e-05, + "loss": 0.2051, + "step": 630 + }, + { + "epoch": 1.7737430167597765, + "grad_norm": 1.0705472230911255, + "learning_rate": 1.1791003751902542e-05, + "loss": 0.1844, + "step": 635 + }, + { + "epoch": 1.7877094972067038, + "grad_norm": 1.1836702823638916, + "learning_rate": 1.1565741750747992e-05, + "loss": 0.2058, + "step": 640 + }, + { + "epoch": 1.8016759776536313, + "grad_norm": 1.293139934539795, + "learning_rate": 1.1341294198374342e-05, + "loss": 0.1957, + "step": 645 + }, + { + "epoch": 1.8156424581005588, + "grad_norm": 1.081498384475708, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1909, + "step": 650 + }, + { + "epoch": 1.829608938547486, + "grad_norm": 1.273537516593933, + "learning_rate": 1.0895055148918758e-05, + "loss": 0.2049, + "step": 655 + }, + { + "epoch": 1.8435754189944134, + "grad_norm": 1.206319808959961, + "learning_rate": 1.0673369479303316e-05, + "loss": 0.1956, + "step": 660 + }, + { + "epoch": 1.8575418994413408, + "grad_norm": 1.1265201568603516, + "learning_rate": 1.0452709888304348e-05, + "loss": 0.1905, + "step": 665 + }, + { + "epoch": 1.8715083798882681, + "grad_norm": 1.152542233467102, + "learning_rate": 1.0233128706271476e-05, + "loss": 0.1861, + "step": 670 + }, + { + "epoch": 1.8854748603351954, + "grad_norm": 1.3747248649597168, + "learning_rate": 1.0014678007805108e-05, + "loss": 0.1765, + "step": 675 + }, + { + "epoch": 1.899441340782123, + "grad_norm": 1.0829384326934814, + "learning_rate": 9.797409599406709e-06, + "loss": 0.1511, + "step": 680 + }, + { + "epoch": 1.9134078212290504, + "grad_norm": 0.9942070841789246, + "learning_rate": 9.581375007192707e-06, + "loss": 0.1776, + "step": 685 + }, + { + "epoch": 1.9273743016759777, + "grad_norm": 1.4194974899291992, + "learning_rate": 9.366625464674812e-06, + "loss": 0.1835, + "step": 690 + }, + { + "epoch": 1.941340782122905, + "grad_norm": 1.1124590635299683, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1436, + "step": 695 + }, + { + "epoch": 1.9553072625698324, + "grad_norm": 1.191678762435913, + "learning_rate": 8.941184926921578e-06, + "loss": 0.1675, + "step": 700 + }, + { + "epoch": 1.9692737430167597, + "grad_norm": 1.0613195896148682, + "learning_rate": 8.730594826698255e-06, + "loss": 0.1475, + "step": 705 + }, + { + "epoch": 1.983240223463687, + "grad_norm": 1.073777198791504, + "learning_rate": 8.521491542267386e-06, + "loss": 0.1478, + "step": 710 + }, + { + "epoch": 1.9972067039106145, + "grad_norm": 1.2126998901367188, + "learning_rate": 8.313924663351927e-06, + "loss": 0.1577, + "step": 715 + }, + { + "epoch": 2.011173184357542, + "grad_norm": 0.9941936731338501, + "learning_rate": 8.107943415309787e-06, + "loss": 0.1476, + "step": 720 + }, + { + "epoch": 2.0251396648044695, + "grad_norm": 1.7277369499206543, + "learning_rate": 7.903596647459851e-06, + "loss": 0.1185, + "step": 725 + }, + { + "epoch": 2.0391061452513966, + "grad_norm": 1.1211957931518555, + "learning_rate": 7.700932821497157e-06, + "loss": 0.1256, + "step": 730 + }, + { + "epoch": 2.053072625698324, + "grad_norm": 0.9109772443771362, + "learning_rate": 7.500000000000004e-06, + "loss": 0.1417, + "step": 735 + }, + { + "epoch": 2.0670391061452515, + "grad_norm": 0.8943061232566833, + "learning_rate": 7.300845835031694e-06, + "loss": 0.1281, + "step": 740 + }, + { + "epoch": 2.0810055865921786, + "grad_norm": 0.9815489053726196, + "learning_rate": 7.103517556839661e-06, + "loss": 0.1261, + "step": 745 + }, + { + "epoch": 2.094972067039106, + "grad_norm": 1.0027356147766113, + "learning_rate": 6.9080619626546276e-06, + "loss": 0.1136, + "step": 750 + }, + { + "epoch": 2.1089385474860336, + "grad_norm": 1.0089319944381714, + "learning_rate": 6.7145254055924136e-06, + "loss": 0.1187, + "step": 755 + }, + { + "epoch": 2.122905027932961, + "grad_norm": 1.0605181455612183, + "learning_rate": 6.522953783661122e-06, + "loss": 0.1193, + "step": 760 + }, + { + "epoch": 2.136871508379888, + "grad_norm": 1.1434526443481445, + "learning_rate": 6.333392528876234e-06, + "loss": 0.1212, + "step": 765 + }, + { + "epoch": 2.1508379888268156, + "grad_norm": 1.045058012008667, + "learning_rate": 6.145886596486208e-06, + "loss": 0.1342, + "step": 770 + }, + { + "epoch": 2.164804469273743, + "grad_norm": 0.8226084113121033, + "learning_rate": 5.960480454311155e-06, + "loss": 0.109, + "step": 775 + }, + { + "epoch": 2.17877094972067, + "grad_norm": 1.098077654838562, + "learning_rate": 5.777218072197114e-06, + "loss": 0.1134, + "step": 780 + }, + { + "epoch": 2.1927374301675977, + "grad_norm": 0.9562540650367737, + "learning_rate": 5.596142911588407e-06, + "loss": 0.0991, + "step": 785 + }, + { + "epoch": 2.206703910614525, + "grad_norm": 0.9545491337776184, + "learning_rate": 5.417297915220584e-06, + "loss": 0.1178, + "step": 790 + }, + { + "epoch": 2.2206703910614527, + "grad_norm": 0.9332047700881958, + "learning_rate": 5.240725496936373e-06, + "loss": 0.1096, + "step": 795 + }, + { + "epoch": 2.2346368715083798, + "grad_norm": 1.1135733127593994, + "learning_rate": 5.0664675316270515e-06, + "loss": 0.1212, + "step": 800 + }, + { + "epoch": 2.2486033519553073, + "grad_norm": 0.8558012843132019, + "learning_rate": 4.894565345301642e-06, + "loss": 0.0979, + "step": 805 + }, + { + "epoch": 2.2625698324022347, + "grad_norm": 0.985834538936615, + "learning_rate": 4.725059705286291e-06, + "loss": 0.0985, + "step": 810 + }, + { + "epoch": 2.276536312849162, + "grad_norm": 0.9061849117279053, + "learning_rate": 4.557990810556102e-06, + "loss": 0.1028, + "step": 815 + }, + { + "epoch": 2.2905027932960893, + "grad_norm": 0.8396353125572205, + "learning_rate": 4.393398282201788e-06, + "loss": 0.1157, + "step": 820 + }, + { + "epoch": 2.304469273743017, + "grad_norm": 0.8069060444831848, + "learning_rate": 4.231321154033372e-06, + "loss": 0.0955, + "step": 825 + }, + { + "epoch": 2.3184357541899443, + "grad_norm": 1.0234320163726807, + "learning_rate": 4.071797863323148e-06, + "loss": 0.1232, + "step": 830 + }, + { + "epoch": 2.3324022346368714, + "grad_norm": 0.9663488864898682, + "learning_rate": 3.914866241690115e-06, + "loss": 0.1111, + "step": 835 + }, + { + "epoch": 2.346368715083799, + "grad_norm": 0.8838871717453003, + "learning_rate": 3.7605635061280604e-06, + "loss": 0.0927, + "step": 840 + }, + { + "epoch": 2.3603351955307263, + "grad_norm": 0.9890097975730896, + "learning_rate": 3.608926250179392e-06, + "loss": 0.1072, + "step": 845 + }, + { + "epoch": 2.3743016759776534, + "grad_norm": 0.8915905356407166, + "learning_rate": 3.459990435256816e-06, + "loss": 0.1099, + "step": 850 + }, + { + "epoch": 2.388268156424581, + "grad_norm": 0.8258458971977234, + "learning_rate": 3.313791382114943e-06, + "loss": 0.1013, + "step": 855 + }, + { + "epoch": 2.4022346368715084, + "grad_norm": 0.8453196883201599, + "learning_rate": 3.1703637624738254e-06, + "loss": 0.0965, + "step": 860 + }, + { + "epoch": 2.416201117318436, + "grad_norm": 0.9213568568229675, + "learning_rate": 3.0297415907964078e-06, + "loss": 0.1161, + "step": 865 + }, + { + "epoch": 2.430167597765363, + "grad_norm": 0.9118015170097351, + "learning_rate": 2.891958216221857e-06, + "loss": 0.0898, + "step": 870 + }, + { + "epoch": 2.4441340782122905, + "grad_norm": 0.807468831539154, + "learning_rate": 2.757046314656676e-06, + "loss": 0.1069, + "step": 875 + }, + { + "epoch": 2.458100558659218, + "grad_norm": 0.8332687020301819, + "learning_rate": 2.6250378810254673e-06, + "loss": 0.0979, + "step": 880 + }, + { + "epoch": 2.472067039106145, + "grad_norm": 1.0207360982894897, + "learning_rate": 2.495964221683209e-06, + "loss": 0.1066, + "step": 885 + }, + { + "epoch": 2.4860335195530725, + "grad_norm": 0.9490545988082886, + "learning_rate": 2.3698559469908228e-06, + "loss": 0.0969, + "step": 890 + }, + { + "epoch": 2.5, + "grad_norm": 0.7144438028335571, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.1022, + "step": 895 + }, + { + "epoch": 2.5139664804469275, + "grad_norm": 1.7635596990585327, + "learning_rate": 2.1266544696395584e-06, + "loss": 0.1068, + "step": 900 + }, + { + "epoch": 2.527932960893855, + "grad_norm": 0.9171124696731567, + "learning_rate": 2.0096189432334194e-06, + "loss": 0.0967, + "step": 905 + }, + { + "epoch": 2.541899441340782, + "grad_norm": 0.9109405875205994, + "learning_rate": 1.8956641403044578e-06, + "loss": 0.0911, + "step": 910 + }, + { + "epoch": 2.5558659217877095, + "grad_norm": 0.7553408741950989, + "learning_rate": 1.784817085713233e-06, + "loss": 0.1108, + "step": 915 + }, + { + "epoch": 2.5698324022346366, + "grad_norm": 0.7201420068740845, + "learning_rate": 1.6771040673047271e-06, + "loss": 0.0978, + "step": 920 + }, + { + "epoch": 2.583798882681564, + "grad_norm": 1.1265109777450562, + "learning_rate": 1.5725506296740666e-06, + "loss": 0.1149, + "step": 925 + }, + { + "epoch": 2.5977653631284916, + "grad_norm": 0.9031225442886353, + "learning_rate": 1.4711815681085094e-06, + "loss": 0.0967, + "step": 930 + }, + { + "epoch": 2.611731843575419, + "grad_norm": 0.8457847237586975, + "learning_rate": 1.3730209227071439e-06, + "loss": 0.1157, + "step": 935 + }, + { + "epoch": 2.6256983240223466, + "grad_norm": 1.0126798152923584, + "learning_rate": 1.2780919726796846e-06, + "loss": 0.109, + "step": 940 + }, + { + "epoch": 2.6396648044692737, + "grad_norm": 0.7266894578933716, + "learning_rate": 1.186417230825695e-06, + "loss": 0.1021, + "step": 945 + }, + { + "epoch": 2.653631284916201, + "grad_norm": 0.7084643244743347, + "learning_rate": 1.0980184381955944e-06, + "loss": 0.0838, + "step": 950 + }, + { + "epoch": 2.6675977653631286, + "grad_norm": 0.7572463154792786, + "learning_rate": 1.0129165589346644e-06, + "loss": 0.0842, + "step": 955 + }, + { + "epoch": 2.6815642458100557, + "grad_norm": 0.7241321206092834, + "learning_rate": 9.311317753113319e-07, + "loss": 0.0965, + "step": 960 + }, + { + "epoch": 2.695530726256983, + "grad_norm": 0.7626327872276306, + "learning_rate": 8.526834829308383e-07, + "loss": 0.0885, + "step": 965 + }, + { + "epoch": 2.7094972067039107, + "grad_norm": 0.7498055696487427, + "learning_rate": 7.775902861355122e-07, + "loss": 0.0816, + "step": 970 + }, + { + "epoch": 2.723463687150838, + "grad_norm": 0.7917249798774719, + "learning_rate": 7.058699935926527e-07, + "loss": 0.0923, + "step": 975 + }, + { + "epoch": 2.7374301675977653, + "grad_norm": 0.7060430645942688, + "learning_rate": 6.375396140711348e-07, + "loss": 0.0941, + "step": 980 + }, + { + "epoch": 2.7513966480446927, + "grad_norm": 0.8947067856788635, + "learning_rate": 5.726153524077144e-07, + "loss": 0.0815, + "step": 985 + }, + { + "epoch": 2.7653631284916202, + "grad_norm": 0.7308821082115173, + "learning_rate": 5.11112605663977e-07, + "loss": 0.1023, + "step": 990 + }, + { + "epoch": 2.7793296089385473, + "grad_norm": 0.824042022228241, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.0979, + "step": 995 + }, + { + "epoch": 2.793296089385475, + "grad_norm": 0.6149610877037048, + "learning_rate": 3.984291845896071e-07, + "loss": 0.0692, + "step": 1000 + }, + { + "epoch": 2.8072625698324023, + "grad_norm": 0.8072413802146912, + "learning_rate": 3.472752336059898e-07, + "loss": 0.0909, + "step": 1005 + }, + { + "epoch": 2.82122905027933, + "grad_norm": 0.6236807107925415, + "learning_rate": 2.995962378985223e-07, + "loss": 0.0799, + "step": 1010 + }, + { + "epoch": 2.835195530726257, + "grad_norm": 0.8194345235824585, + "learning_rate": 2.5540350474147324e-07, + "loss": 0.1, + "step": 1015 + }, + { + "epoch": 2.8491620111731844, + "grad_norm": 0.9429143667221069, + "learning_rate": 2.1470751462729143e-07, + "loss": 0.097, + "step": 1020 + }, + { + "epoch": 2.863128491620112, + "grad_norm": 0.8199458718299866, + "learning_rate": 1.7751791878110933e-07, + "loss": 0.086, + "step": 1025 + }, + { + "epoch": 2.877094972067039, + "grad_norm": 0.9080390334129333, + "learning_rate": 1.4384353687192376e-07, + "loss": 0.1032, + "step": 1030 + }, + { + "epoch": 2.8910614525139664, + "grad_norm": 0.8496189713478088, + "learning_rate": 1.1369235492096397e-07, + "loss": 0.1053, + "step": 1035 + }, + { + "epoch": 2.905027932960894, + "grad_norm": 0.7269903421401978, + "learning_rate": 8.707152340778346e-08, + "loss": 0.0904, + "step": 1040 + }, + { + "epoch": 2.9189944134078214, + "grad_norm": 0.7761643528938293, + "learning_rate": 6.398735557448299e-08, + "loss": 0.1007, + "step": 1045 + }, + { + "epoch": 2.9329608938547485, + "grad_norm": 0.7777862548828125, + "learning_rate": 4.4445325928506584e-08, + "loss": 0.0813, + "step": 1050 + }, + { + "epoch": 2.946927374301676, + "grad_norm": 0.7220128774642944, + "learning_rate": 2.8450068944338436e-08, + "loss": 0.0889, + "step": 1055 + }, + { + "epoch": 2.9608938547486034, + "grad_norm": 0.7919196486473083, + "learning_rate": 1.6005377964413702e-08, + "loss": 0.0908, + "step": 1060 + }, + { + "epoch": 2.9748603351955305, + "grad_norm": 0.8679852485656738, + "learning_rate": 7.114204299511484e-09, + "loss": 0.0884, + "step": 1065 + }, + { + "epoch": 2.988826815642458, + "grad_norm": 0.6622107028961182, + "learning_rate": 1.7786565288463452e-09, + "loss": 0.0782, + "step": 1070 + }, + { + "epoch": 3.0, + "step": 1074, + "total_flos": 1.3653474920061993e+18, + "train_loss": 0.42329370426066093, + "train_runtime": 1085.1535, + "train_samples_per_second": 31.657, + "train_steps_per_second": 0.99 + } + ], + "logging_steps": 5, + "max_steps": 1074, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3653474920061993e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ed1c139c6a8ebe398b0bb731d86a692678d46b0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/10_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f222459b4c787a8babc4d872dfaa4511fcf158ad8b1e2630f925723fe87df93 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4a6ef97fd6299164de398256e97f813c4bd37563 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 11_128_e3_3e-5 + results: [] +--- + + + +# 11_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed7a55b45e8401edb2c34f8027a1c785bc3ff5c5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "k_proj", + "down_proj", + "q_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53ad8d3125c7a666f87be29d90ddc89dd3691f24 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ad15284b8cd2eacd2b5807aa5cac1f0c61ebe0e672aac23d0eed78246574e1 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ff4688b487f88b6565209c4357a3c5c62161e8d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.1998547264478904e+18, + "train_loss": 0.4352821594682233, + "train_runtime": 980.5017, + "train_samples": 10189, + "train_samples_per_second": 31.175, + "train_steps_per_second": 0.976 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ff4688b487f88b6565209c4357a3c5c62161e8d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.1998547264478904e+18, + "train_loss": 0.4352821594682233, + "train_runtime": 980.5017, + "train_samples": 10189, + "train_samples_per_second": 31.175, + "train_steps_per_second": 0.976 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..25565235123db80c9d027865fe5f87f2b51d2ed2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1380 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 957, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015698587127158554, + "grad_norm": 0.7272019386291504, + "learning_rate": 2.4999999999999998e-06, + "loss": 1.6019, + "step": 5 + }, + { + "epoch": 0.03139717425431711, + "grad_norm": 0.5829149484634399, + "learning_rate": 5.625e-06, + "loss": 1.5658, + "step": 10 + }, + { + "epoch": 0.04709576138147567, + "grad_norm": 0.5889654159545898, + "learning_rate": 8.750000000000001e-06, + "loss": 1.6002, + "step": 15 + }, + { + "epoch": 0.06279434850863422, + "grad_norm": 0.5121167302131653, + "learning_rate": 1.1874999999999999e-05, + "loss": 1.5073, + "step": 20 + }, + { + "epoch": 0.07849293563579278, + "grad_norm": 0.5146015286445618, + "learning_rate": 1.5e-05, + "loss": 1.5996, + "step": 25 + }, + { + "epoch": 0.09419152276295134, + "grad_norm": 0.4867265522480011, + "learning_rate": 1.8125e-05, + "loss": 1.4919, + "step": 30 + }, + { + "epoch": 0.10989010989010989, + "grad_norm": 0.49370303750038147, + "learning_rate": 2.125e-05, + "loss": 1.4731, + "step": 35 + }, + { + "epoch": 0.12558869701726844, + "grad_norm": 0.4852001368999481, + "learning_rate": 2.4375e-05, + "loss": 1.4968, + "step": 40 + }, + { + "epoch": 0.141287284144427, + "grad_norm": 0.4971102476119995, + "learning_rate": 2.75e-05, + "loss": 1.4455, + "step": 45 + }, + { + "epoch": 0.15698587127158556, + "grad_norm": 0.5377012491226196, + "learning_rate": 2.9999910415513213e-05, + "loss": 1.3808, + "step": 50 + }, + { + "epoch": 0.1726844583987441, + "grad_norm": 0.5773342251777649, + "learning_rate": 2.9996775070829517e-05, + "loss": 1.357, + "step": 55 + }, + { + "epoch": 0.18838304552590268, + "grad_norm": 0.5665014982223511, + "learning_rate": 2.9989161571799668e-05, + "loss": 1.3771, + "step": 60 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.6331230998039246, + "learning_rate": 2.997707219187402e-05, + "loss": 1.3503, + "step": 65 + }, + { + "epoch": 0.21978021978021978, + "grad_norm": 0.6242734789848328, + "learning_rate": 2.9960510541036e-05, + "loss": 1.3576, + "step": 70 + }, + { + "epoch": 0.23547880690737832, + "grad_norm": 0.6785939335823059, + "learning_rate": 2.993948156472409e-05, + "loss": 1.2952, + "step": 75 + }, + { + "epoch": 0.25117739403453687, + "grad_norm": 0.5967380404472351, + "learning_rate": 2.9913991542355115e-05, + "loss": 1.1975, + "step": 80 + }, + { + "epoch": 0.2668759811616955, + "grad_norm": 0.6207562685012817, + "learning_rate": 2.988404808544915e-05, + "loss": 1.1783, + "step": 85 + }, + { + "epoch": 0.282574568288854, + "grad_norm": 0.6685380935668945, + "learning_rate": 2.9849660135356648e-05, + "loss": 1.1626, + "step": 90 + }, + { + "epoch": 0.29827315541601257, + "grad_norm": 0.7447589635848999, + "learning_rate": 2.9810837960588506e-05, + "loss": 1.1551, + "step": 95 + }, + { + "epoch": 0.3139717425431711, + "grad_norm": 0.7206305265426636, + "learning_rate": 2.976759315374979e-05, + "loss": 1.1702, + "step": 100 + }, + { + "epoch": 0.32967032967032966, + "grad_norm": 0.7702519297599792, + "learning_rate": 2.97199386280781e-05, + "loss": 1.1182, + "step": 105 + }, + { + "epoch": 0.3453689167974882, + "grad_norm": 0.7851648330688477, + "learning_rate": 2.9667888613587565e-05, + "loss": 1.0523, + "step": 110 + }, + { + "epoch": 0.36106750392464676, + "grad_norm": 0.7640228867530823, + "learning_rate": 2.961145865281967e-05, + "loss": 1.0002, + "step": 115 + }, + { + "epoch": 0.37676609105180536, + "grad_norm": 0.9413154721260071, + "learning_rate": 2.9550665596202116e-05, + "loss": 1.0897, + "step": 120 + }, + { + "epoch": 0.3924646781789639, + "grad_norm": 0.9648019075393677, + "learning_rate": 2.948552759701715e-05, + "loss": 1.0083, + "step": 125 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 0.8464683890342712, + "learning_rate": 2.941606410598087e-05, + "loss": 0.9951, + "step": 130 + }, + { + "epoch": 0.423861852433281, + "grad_norm": 0.847690224647522, + "learning_rate": 2.9342295865435055e-05, + "loss": 1.0119, + "step": 135 + }, + { + "epoch": 0.43956043956043955, + "grad_norm": 1.0446066856384277, + "learning_rate": 2.926424490315338e-05, + "loss": 0.977, + "step": 140 + }, + { + "epoch": 0.4552590266875981, + "grad_norm": 0.9778227806091309, + "learning_rate": 2.9181934525763717e-05, + "loss": 0.911, + "step": 145 + }, + { + "epoch": 0.47095761381475665, + "grad_norm": 1.081539273262024, + "learning_rate": 2.9095389311788626e-05, + "loss": 0.8235, + "step": 150 + }, + { + "epoch": 0.48665620094191525, + "grad_norm": 1.0612655878067017, + "learning_rate": 2.900463510430598e-05, + "loss": 0.8675, + "step": 155 + }, + { + "epoch": 0.5023547880690737, + "grad_norm": 1.033350944519043, + "learning_rate": 2.8909699003232043e-05, + "loss": 0.8184, + "step": 160 + }, + { + "epoch": 0.5180533751962323, + "grad_norm": 0.9380264282226562, + "learning_rate": 2.8810609357229226e-05, + "loss": 0.8101, + "step": 165 + }, + { + "epoch": 0.533751962323391, + "grad_norm": 1.0123512744903564, + "learning_rate": 2.870739575524093e-05, + "loss": 0.8488, + "step": 170 + }, + { + "epoch": 0.5494505494505495, + "grad_norm": 1.0092380046844482, + "learning_rate": 2.8600089017656087e-05, + "loss": 0.8273, + "step": 175 + }, + { + "epoch": 0.565149136577708, + "grad_norm": 1.1826591491699219, + "learning_rate": 2.8488721187105934e-05, + "loss": 0.79, + "step": 180 + }, + { + "epoch": 0.5808477237048666, + "grad_norm": 1.0729703903198242, + "learning_rate": 2.8373325518895826e-05, + "loss": 0.7752, + "step": 185 + }, + { + "epoch": 0.5965463108320251, + "grad_norm": 1.2238399982452393, + "learning_rate": 2.8253936471074954e-05, + "loss": 0.7405, + "step": 190 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 1.1926252841949463, + "learning_rate": 2.81305896941469e-05, + "loss": 0.7067, + "step": 195 + }, + { + "epoch": 0.6279434850863422, + "grad_norm": 1.1798923015594482, + "learning_rate": 2.8003322020424126e-05, + "loss": 0.7312, + "step": 200 + }, + { + "epoch": 0.6436420722135008, + "grad_norm": 1.2327336072921753, + "learning_rate": 2.787217145302953e-05, + "loss": 0.6918, + "step": 205 + }, + { + "epoch": 0.6593406593406593, + "grad_norm": 1.131651759147644, + "learning_rate": 2.7737177154548442e-05, + "loss": 0.73, + "step": 210 + }, + { + "epoch": 0.6750392464678179, + "grad_norm": 1.0517631769180298, + "learning_rate": 2.7598379435334358e-05, + "loss": 0.7145, + "step": 215 + }, + { + "epoch": 0.6907378335949764, + "grad_norm": 1.191736102104187, + "learning_rate": 2.7455819741471965e-05, + "loss": 0.6485, + "step": 220 + }, + { + "epoch": 0.706436420722135, + "grad_norm": 1.3923006057739258, + "learning_rate": 2.7309540642400998e-05, + "loss": 0.664, + "step": 225 + }, + { + "epoch": 0.7221350078492935, + "grad_norm": 1.1946771144866943, + "learning_rate": 2.715958581820469e-05, + "loss": 0.6507, + "step": 230 + }, + { + "epoch": 0.7378335949764521, + "grad_norm": 1.2704203128814697, + "learning_rate": 2.7006000046566543e-05, + "loss": 0.6627, + "step": 235 + }, + { + "epoch": 0.7535321821036107, + "grad_norm": 1.0890724658966064, + "learning_rate": 2.684882918939937e-05, + "loss": 0.6239, + "step": 240 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.1511670351028442, + "learning_rate": 2.6688120179150563e-05, + "loss": 0.6214, + "step": 245 + }, + { + "epoch": 0.7849293563579278, + "grad_norm": 1.3642829656600952, + "learning_rate": 2.6523921004787707e-05, + "loss": 0.6053, + "step": 250 + }, + { + "epoch": 0.8006279434850864, + "grad_norm": 1.201777696609497, + "learning_rate": 2.635628069746869e-05, + "loss": 0.6212, + "step": 255 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 1.2050590515136719, + "learning_rate": 2.6185249315900625e-05, + "loss": 0.5713, + "step": 260 + }, + { + "epoch": 0.8320251177394035, + "grad_norm": 1.1394777297973633, + "learning_rate": 2.6010877931391905e-05, + "loss": 0.594, + "step": 265 + }, + { + "epoch": 0.847723704866562, + "grad_norm": 1.7931807041168213, + "learning_rate": 2.5833218612601937e-05, + "loss": 0.5684, + "step": 270 + }, + { + "epoch": 0.8634222919937206, + "grad_norm": 1.4945976734161377, + "learning_rate": 2.5652324409993034e-05, + "loss": 0.5227, + "step": 275 + }, + { + "epoch": 0.8791208791208791, + "grad_norm": 1.115197777748108, + "learning_rate": 2.546824933998911e-05, + "loss": 0.4768, + "step": 280 + }, + { + "epoch": 0.8948194662480377, + "grad_norm": 1.5279356241226196, + "learning_rate": 2.5281048368845964e-05, + "loss": 0.5873, + "step": 285 + }, + { + "epoch": 0.9105180533751962, + "grad_norm": 1.239617943763733, + "learning_rate": 2.509077739623793e-05, + "loss": 0.5082, + "step": 290 + }, + { + "epoch": 0.9262166405023547, + "grad_norm": 1.242232322692871, + "learning_rate": 2.48974932385658e-05, + "loss": 0.4894, + "step": 295 + }, + { + "epoch": 0.9419152276295133, + "grad_norm": 1.165049433708191, + "learning_rate": 2.470125361199099e-05, + "loss": 0.5034, + "step": 300 + }, + { + "epoch": 0.957613814756672, + "grad_norm": 1.8544493913650513, + "learning_rate": 2.4502117115201048e-05, + "loss": 0.4778, + "step": 305 + }, + { + "epoch": 0.9733124018838305, + "grad_norm": 1.2787054777145386, + "learning_rate": 2.430014321191163e-05, + "loss": 0.4889, + "step": 310 + }, + { + "epoch": 0.989010989010989, + "grad_norm": 1.208701491355896, + "learning_rate": 2.4095392213110167e-05, + "loss": 0.5373, + "step": 315 + }, + { + "epoch": 1.0031397174254317, + "grad_norm": 1.3892751932144165, + "learning_rate": 2.3887925259046542e-05, + "loss": 0.5315, + "step": 320 + }, + { + "epoch": 1.0188383045525902, + "grad_norm": 1.4144299030303955, + "learning_rate": 2.3677804300976112e-05, + "loss": 0.432, + "step": 325 + }, + { + "epoch": 1.0345368916797488, + "grad_norm": 1.3491207361221313, + "learning_rate": 2.3465092082660595e-05, + "loss": 0.4322, + "step": 330 + }, + { + "epoch": 1.0502354788069075, + "grad_norm": 1.370428442955017, + "learning_rate": 2.3249852121632293e-05, + "loss": 0.3771, + "step": 335 + }, + { + "epoch": 1.065934065934066, + "grad_norm": 1.3499157428741455, + "learning_rate": 2.303214869022725e-05, + "loss": 0.4273, + "step": 340 + }, + { + "epoch": 1.0816326530612246, + "grad_norm": 1.3882744312286377, + "learning_rate": 2.2812046796393032e-05, + "loss": 0.3854, + "step": 345 + }, + { + "epoch": 1.097331240188383, + "grad_norm": 1.1511824131011963, + "learning_rate": 2.258961216427686e-05, + "loss": 0.3374, + "step": 350 + }, + { + "epoch": 1.1130298273155417, + "grad_norm": 1.248146891593933, + "learning_rate": 2.2364911214599832e-05, + "loss": 0.3791, + "step": 355 + }, + { + "epoch": 1.1287284144427001, + "grad_norm": 1.360170602798462, + "learning_rate": 2.2138011044823226e-05, + "loss": 0.398, + "step": 360 + }, + { + "epoch": 1.1444270015698588, + "grad_norm": 1.3613286018371582, + "learning_rate": 2.190897940911262e-05, + "loss": 0.3493, + "step": 365 + }, + { + "epoch": 1.1601255886970172, + "grad_norm": 1.081156849861145, + "learning_rate": 2.1677884698106006e-05, + "loss": 0.3137, + "step": 370 + }, + { + "epoch": 1.1758241758241759, + "grad_norm": 1.3652514219284058, + "learning_rate": 2.1444795918491796e-05, + "loss": 0.3646, + "step": 375 + }, + { + "epoch": 1.1915227629513343, + "grad_norm": 1.5053037405014038, + "learning_rate": 2.1209782672402877e-05, + "loss": 0.3418, + "step": 380 + }, + { + "epoch": 1.207221350078493, + "grad_norm": 1.2264224290847778, + "learning_rate": 2.0972915136632894e-05, + "loss": 0.3516, + "step": 385 + }, + { + "epoch": 1.2229199372056514, + "grad_norm": 1.5475026369094849, + "learning_rate": 2.0734264041680886e-05, + "loss": 0.3066, + "step": 390 + }, + { + "epoch": 1.23861852433281, + "grad_norm": 1.2828965187072754, + "learning_rate": 2.049390065063062e-05, + "loss": 0.3407, + "step": 395 + }, + { + "epoch": 1.2543171114599687, + "grad_norm": 1.3047906160354614, + "learning_rate": 2.0251896737870862e-05, + "loss": 0.3482, + "step": 400 + }, + { + "epoch": 1.2700156985871272, + "grad_norm": 1.344680666923523, + "learning_rate": 2.000832456766301e-05, + "loss": 0.308, + "step": 405 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.9050642251968384, + "learning_rate": 1.976325687256239e-05, + "loss": 0.3373, + "step": 410 + }, + { + "epoch": 1.3014128728414442, + "grad_norm": 1.2462161779403687, + "learning_rate": 1.9516766831699767e-05, + "loss": 0.3074, + "step": 415 + }, + { + "epoch": 1.317111459968603, + "grad_norm": 1.2117626667022705, + "learning_rate": 1.9268928048929476e-05, + "loss": 0.2953, + "step": 420 + }, + { + "epoch": 1.3328100470957613, + "grad_norm": 1.3953559398651123, + "learning_rate": 1.9019814530850722e-05, + "loss": 0.3262, + "step": 425 + }, + { + "epoch": 1.34850863422292, + "grad_norm": 1.286778450012207, + "learning_rate": 1.8769500664708665e-05, + "loss": 0.3296, + "step": 430 + }, + { + "epoch": 1.3642072213500784, + "grad_norm": 1.4048824310302734, + "learning_rate": 1.851806119618178e-05, + "loss": 0.2995, + "step": 435 + }, + { + "epoch": 1.379905808477237, + "grad_norm": 1.3122953176498413, + "learning_rate": 1.826557120706221e-05, + "loss": 0.2888, + "step": 440 + }, + { + "epoch": 1.3956043956043955, + "grad_norm": 1.6975677013397217, + "learning_rate": 1.801210609283578e-05, + "loss": 0.3092, + "step": 445 + }, + { + "epoch": 1.4113029827315542, + "grad_norm": 1.2934538125991821, + "learning_rate": 1.7757741540168276e-05, + "loss": 0.2971, + "step": 450 + }, + { + "epoch": 1.4270015698587128, + "grad_norm": 1.5370351076126099, + "learning_rate": 1.750255350430487e-05, + "loss": 0.2646, + "step": 455 + }, + { + "epoch": 1.4427001569858713, + "grad_norm": 1.216896653175354, + "learning_rate": 1.7246618186389225e-05, + "loss": 0.2733, + "step": 460 + }, + { + "epoch": 1.4583987441130297, + "grad_norm": 1.4258239269256592, + "learning_rate": 1.6990012010709293e-05, + "loss": 0.2724, + "step": 465 + }, + { + "epoch": 1.4740973312401884, + "grad_norm": 1.2620471715927124, + "learning_rate": 1.6732811601876395e-05, + "loss": 0.274, + "step": 470 + }, + { + "epoch": 1.489795918367347, + "grad_norm": 1.3154317140579224, + "learning_rate": 1.6475093761944522e-05, + "loss": 0.2466, + "step": 475 + }, + { + "epoch": 1.5054945054945055, + "grad_norm": 1.2884193658828735, + "learning_rate": 1.6216935447476633e-05, + "loss": 0.2736, + "step": 480 + }, + { + "epoch": 1.521193092621664, + "grad_norm": 1.3418302536010742, + "learning_rate": 1.5958413746564833e-05, + "loss": 0.2759, + "step": 485 + }, + { + "epoch": 1.5368916797488226, + "grad_norm": 1.376930832862854, + "learning_rate": 1.5699605855811263e-05, + "loss": 0.2471, + "step": 490 + }, + { + "epoch": 1.5525902668759812, + "grad_norm": 1.3898321390151978, + "learning_rate": 1.5440589057276587e-05, + "loss": 0.2659, + "step": 495 + }, + { + "epoch": 1.5682888540031397, + "grad_norm": 1.2682690620422363, + "learning_rate": 1.5181440695402963e-05, + "loss": 0.2469, + "step": 500 + }, + { + "epoch": 1.5839874411302983, + "grad_norm": 1.2689883708953857, + "learning_rate": 1.4922238153918409e-05, + "loss": 0.2281, + "step": 505 + }, + { + "epoch": 1.599686028257457, + "grad_norm": 1.2716364860534668, + "learning_rate": 1.4663058832729368e-05, + "loss": 0.2199, + "step": 510 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 1.4238063097000122, + "learning_rate": 1.4403980124808546e-05, + "loss": 0.224, + "step": 515 + }, + { + "epoch": 1.6310832025117739, + "grad_norm": 1.3630428314208984, + "learning_rate": 1.414507939308473e-05, + "loss": 0.2287, + "step": 520 + }, + { + "epoch": 1.6467817896389325, + "grad_norm": 1.2887314558029175, + "learning_rate": 1.3886433947341626e-05, + "loss": 0.228, + "step": 525 + }, + { + "epoch": 1.6624803767660912, + "grad_norm": 1.4318568706512451, + "learning_rate": 1.3628121021132552e-05, + "loss": 0.2523, + "step": 530 + }, + { + "epoch": 1.6781789638932496, + "grad_norm": 1.4085540771484375, + "learning_rate": 1.3370217748717882e-05, + "loss": 0.2048, + "step": 535 + }, + { + "epoch": 1.693877551020408, + "grad_norm": 1.2988741397857666, + "learning_rate": 1.3112801142032169e-05, + "loss": 0.1955, + "step": 540 + }, + { + "epoch": 1.7095761381475667, + "grad_norm": 1.3304824829101562, + "learning_rate": 1.2855948067687776e-05, + "loss": 0.1931, + "step": 545 + }, + { + "epoch": 1.7252747252747254, + "grad_norm": 1.2239878177642822, + "learning_rate": 1.2599735224021893e-05, + "loss": 0.2037, + "step": 550 + }, + { + "epoch": 1.7409733124018838, + "grad_norm": 1.1645827293395996, + "learning_rate": 1.2344239118193825e-05, + "loss": 0.1689, + "step": 555 + }, + { + "epoch": 1.7566718995290422, + "grad_norm": 1.1807641983032227, + "learning_rate": 1.2089536043339352e-05, + "loss": 0.2031, + "step": 560 + }, + { + "epoch": 1.772370486656201, + "grad_norm": 1.188545823097229, + "learning_rate": 1.1835702055789005e-05, + "loss": 0.1756, + "step": 565 + }, + { + "epoch": 1.7880690737833596, + "grad_norm": 1.232046365737915, + "learning_rate": 1.1582812952357039e-05, + "loss": 0.2011, + "step": 570 + }, + { + "epoch": 1.803767660910518, + "grad_norm": 1.476468563079834, + "learning_rate": 1.1330944247707935e-05, + "loss": 0.1929, + "step": 575 + }, + { + "epoch": 1.8194662480376766, + "grad_norm": 1.2453932762145996, + "learning_rate": 1.1080171151807095e-05, + "loss": 0.1633, + "step": 580 + }, + { + "epoch": 1.8351648351648353, + "grad_norm": 1.222991943359375, + "learning_rate": 1.0830568547462595e-05, + "loss": 0.1632, + "step": 585 + }, + { + "epoch": 1.8508634222919937, + "grad_norm": 1.2895456552505493, + "learning_rate": 1.0582210967964569e-05, + "loss": 0.2065, + "step": 590 + }, + { + "epoch": 1.8665620094191522, + "grad_norm": 1.0879322290420532, + "learning_rate": 1.0335172574828992e-05, + "loss": 0.1767, + "step": 595 + }, + { + "epoch": 1.8822605965463108, + "grad_norm": 1.2178689241409302, + "learning_rate": 1.0089527135652483e-05, + "loss": 0.1715, + "step": 600 + }, + { + "epoch": 1.8979591836734695, + "grad_norm": 1.1117581129074097, + "learning_rate": 9.8453480020847e-06, + "loss": 0.1755, + "step": 605 + }, + { + "epoch": 1.913657770800628, + "grad_norm": 1.2283244132995605, + "learning_rate": 9.602708087924985e-06, + "loss": 0.1741, + "step": 610 + }, + { + "epoch": 1.9293563579277864, + "grad_norm": 1.2443703413009644, + "learning_rate": 9.361679847349715e-06, + "loss": 0.1636, + "step": 615 + }, + { + "epoch": 1.945054945054945, + "grad_norm": 1.158144235610962, + "learning_rate": 9.122335253276932e-06, + "loss": 0.1815, + "step": 620 + }, + { + "epoch": 1.9607535321821037, + "grad_norm": 1.3021451234817505, + "learning_rate": 8.88474577587467e-06, + "loss": 0.192, + "step": 625 + }, + { + "epoch": 1.9764521193092621, + "grad_norm": 1.3853386640548706, + "learning_rate": 8.648982361219381e-06, + "loss": 0.1868, + "step": 630 + }, + { + "epoch": 1.9921507064364206, + "grad_norm": 1.1720635890960693, + "learning_rate": 8.415115410110908e-06, + "loss": 0.1608, + "step": 635 + }, + { + "epoch": 2.0062794348508635, + "grad_norm": 1.1345287561416626, + "learning_rate": 8.183214757050242e-06, + "loss": 0.1513, + "step": 640 + }, + { + "epoch": 2.021978021978022, + "grad_norm": 1.0394654273986816, + "learning_rate": 7.95334964938639e-06, + "loss": 0.1231, + "step": 645 + }, + { + "epoch": 2.0376766091051803, + "grad_norm": 0.9559608697891235, + "learning_rate": 7.725588726638591e-06, + "loss": 0.1427, + "step": 650 + }, + { + "epoch": 2.053375196232339, + "grad_norm": 1.1492160558700562, + "learning_rate": 7.500000000000004e-06, + "loss": 0.1273, + "step": 655 + }, + { + "epoch": 2.0690737833594977, + "grad_norm": 1.0703526735305786, + "learning_rate": 7.276650832029032e-06, + "loss": 0.1314, + "step": 660 + }, + { + "epoch": 2.084772370486656, + "grad_norm": 1.0511596202850342, + "learning_rate": 7.0556079165343475e-06, + "loss": 0.1142, + "step": 665 + }, + { + "epoch": 2.100470957613815, + "grad_norm": 1.1839357614517212, + "learning_rate": 6.836937258659589e-06, + "loss": 0.1296, + "step": 670 + }, + { + "epoch": 2.1161695447409734, + "grad_norm": 1.0072215795516968, + "learning_rate": 6.620704155173724e-06, + "loss": 0.1513, + "step": 675 + }, + { + "epoch": 2.131868131868132, + "grad_norm": 1.2412188053131104, + "learning_rate": 6.406973174972901e-06, + "loss": 0.1307, + "step": 680 + }, + { + "epoch": 2.1475667189952903, + "grad_norm": 0.9480654001235962, + "learning_rate": 6.1958081397996855e-06, + "loss": 0.1057, + "step": 685 + }, + { + "epoch": 2.163265306122449, + "grad_norm": 1.0926275253295898, + "learning_rate": 5.987272105185388e-06, + "loss": 0.128, + "step": 690 + }, + { + "epoch": 2.1789638932496076, + "grad_norm": 0.8243603706359863, + "learning_rate": 5.781427341621183e-06, + "loss": 0.1142, + "step": 695 + }, + { + "epoch": 2.194662480376766, + "grad_norm": 1.1897200345993042, + "learning_rate": 5.5783353159636625e-06, + "loss": 0.1262, + "step": 700 + }, + { + "epoch": 2.2103610675039245, + "grad_norm": 1.1367827653884888, + "learning_rate": 5.3780566730803445e-06, + "loss": 0.1161, + "step": 705 + }, + { + "epoch": 2.2260596546310834, + "grad_norm": 0.9297449588775635, + "learning_rate": 5.180651217740642e-06, + "loss": 0.107, + "step": 710 + }, + { + "epoch": 2.241758241758242, + "grad_norm": 1.282331109046936, + "learning_rate": 4.986177896757696e-06, + "loss": 0.1019, + "step": 715 + }, + { + "epoch": 2.2574568288854002, + "grad_norm": 0.8150553703308105, + "learning_rate": 4.794694781386393e-06, + "loss": 0.1076, + "step": 720 + }, + { + "epoch": 2.2731554160125587, + "grad_norm": 1.0080894231796265, + "learning_rate": 4.606259049982822e-06, + "loss": 0.1137, + "step": 725 + }, + { + "epoch": 2.2888540031397175, + "grad_norm": 0.9914476275444031, + "learning_rate": 4.420926970930394e-06, + "loss": 0.0976, + "step": 730 + }, + { + "epoch": 2.304552590266876, + "grad_norm": 1.092005729675293, + "learning_rate": 4.23875388583765e-06, + "loss": 0.1278, + "step": 735 + }, + { + "epoch": 2.3202511773940344, + "grad_norm": 0.9444569945335388, + "learning_rate": 4.059794193012813e-06, + "loss": 0.1169, + "step": 740 + }, + { + "epoch": 2.3359497645211933, + "grad_norm": 0.9480182528495789, + "learning_rate": 3.884101331220049e-06, + "loss": 0.1045, + "step": 745 + }, + { + "epoch": 2.3516483516483517, + "grad_norm": 1.1857447624206543, + "learning_rate": 3.7117277637222235e-06, + "loss": 0.0946, + "step": 750 + }, + { + "epoch": 2.36734693877551, + "grad_norm": 0.7603578567504883, + "learning_rate": 3.5427249626149545e-06, + "loss": 0.1117, + "step": 755 + }, + { + "epoch": 2.3830455259026686, + "grad_norm": 0.8697259426116943, + "learning_rate": 3.3771433934566532e-06, + "loss": 0.0912, + "step": 760 + }, + { + "epoch": 2.3987441130298275, + "grad_norm": 0.84373939037323, + "learning_rate": 3.215032500199106e-06, + "loss": 0.0985, + "step": 765 + }, + { + "epoch": 2.414442700156986, + "grad_norm": 0.8262746334075928, + "learning_rate": 3.056440690423127e-06, + "loss": 0.0917, + "step": 770 + }, + { + "epoch": 2.4301412872841444, + "grad_norm": 0.8675354719161987, + "learning_rate": 2.9014153208836764e-06, + "loss": 0.1059, + "step": 775 + }, + { + "epoch": 2.445839874411303, + "grad_norm": 0.8028172850608826, + "learning_rate": 2.750002683368761e-06, + "loss": 0.0918, + "step": 780 + }, + { + "epoch": 2.4615384615384617, + "grad_norm": 0.9062762260437012, + "learning_rate": 2.602247990876346e-06, + "loss": 0.0924, + "step": 785 + }, + { + "epoch": 2.47723704866562, + "grad_norm": 0.8460918664932251, + "learning_rate": 2.4581953641133975e-06, + "loss": 0.0857, + "step": 790 + }, + { + "epoch": 2.4929356357927785, + "grad_norm": 0.9238728284835815, + "learning_rate": 2.3178878183210966e-06, + "loss": 0.0982, + "step": 795 + }, + { + "epoch": 2.5086342229199374, + "grad_norm": 0.7576223015785217, + "learning_rate": 2.181367250430153e-06, + "loss": 0.1043, + "step": 800 + }, + { + "epoch": 2.524332810047096, + "grad_norm": 0.8626974821090698, + "learning_rate": 2.0486744265500477e-06, + "loss": 0.112, + "step": 805 + }, + { + "epoch": 2.5400313971742543, + "grad_norm": 0.8985633254051208, + "learning_rate": 1.9198489697959635e-06, + "loss": 0.0924, + "step": 810 + }, + { + "epoch": 2.5557299843014127, + "grad_norm": 0.8535839319229126, + "learning_rate": 1.7949293484570045e-06, + "loss": 0.116, + "step": 815 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.9583487510681152, + "learning_rate": 1.6739528645092666e-06, + "loss": 0.1002, + "step": 820 + }, + { + "epoch": 2.58712715855573, + "grad_norm": 0.8748232126235962, + "learning_rate": 1.556955642477177e-06, + "loss": 0.0936, + "step": 825 + }, + { + "epoch": 2.6028257456828885, + "grad_norm": 0.6575373411178589, + "learning_rate": 1.4439726186464264e-06, + "loss": 0.0887, + "step": 830 + }, + { + "epoch": 2.618524332810047, + "grad_norm": 0.7931586503982544, + "learning_rate": 1.335037530631727e-06, + "loss": 0.1015, + "step": 835 + }, + { + "epoch": 2.634222919937206, + "grad_norm": 0.7339344620704651, + "learning_rate": 1.2301829073024833e-06, + "loss": 0.1023, + "step": 840 + }, + { + "epoch": 2.6499215070643642, + "grad_norm": 0.7151268720626831, + "learning_rate": 1.1294400590694348e-06, + "loss": 0.0818, + "step": 845 + }, + { + "epoch": 2.6656200941915227, + "grad_norm": 0.857877790927887, + "learning_rate": 1.0328390685351085e-06, + "loss": 0.0743, + "step": 850 + }, + { + "epoch": 2.6813186813186816, + "grad_norm": 0.7997430562973022, + "learning_rate": 9.404087815109241e-07, + "loss": 0.0827, + "step": 855 + }, + { + "epoch": 2.69701726844584, + "grad_norm": 0.7026591300964355, + "learning_rate": 8.521767984036083e-07, + "loss": 0.1051, + "step": 860 + }, + { + "epoch": 2.7127158555729984, + "grad_norm": 0.8101130127906799, + "learning_rate": 7.681694659734995e-07, + "loss": 0.0883, + "step": 865 + }, + { + "epoch": 2.728414442700157, + "grad_norm": 0.7735944390296936, + "learning_rate": 6.88411869467196e-07, + "loss": 0.0853, + "step": 870 + }, + { + "epoch": 2.7441130298273153, + "grad_norm": 0.6088822484016418, + "learning_rate": 6.12927825126906e-07, + "loss": 0.1053, + "step": 875 + }, + { + "epoch": 2.759811616954474, + "grad_norm": 0.9550263285636902, + "learning_rate": 5.417398730787321e-07, + "loss": 0.0979, + "step": 880 + }, + { + "epoch": 2.7755102040816326, + "grad_norm": 0.7818852066993713, + "learning_rate": 4.748692706020125e-07, + "loss": 0.0855, + "step": 885 + }, + { + "epoch": 2.791208791208791, + "grad_norm": 0.7133302092552185, + "learning_rate": 4.1233598578173104e-07, + "loss": 0.0828, + "step": 890 + }, + { + "epoch": 2.80690737833595, + "grad_norm": 0.8189200758934021, + "learning_rate": 3.5415869154589495e-07, + "loss": 0.0911, + "step": 895 + }, + { + "epoch": 2.8226059654631084, + "grad_norm": 0.69682776927948, + "learning_rate": 3.0035476008964337e-07, + "loss": 0.1037, + "step": 900 + }, + { + "epoch": 2.838304552590267, + "grad_norm": 0.6839686036109924, + "learning_rate": 2.5094025768778506e-07, + "loss": 0.0859, + "step": 905 + }, + { + "epoch": 2.8540031397174257, + "grad_norm": 0.7379226088523865, + "learning_rate": 2.0592993989727305e-07, + "loss": 0.1048, + "step": 910 + }, + { + "epoch": 2.869701726844584, + "grad_norm": 0.7856185436248779, + "learning_rate": 1.6533724715108555e-07, + "loss": 0.1111, + "step": 915 + }, + { + "epoch": 2.8854003139717426, + "grad_norm": 3.2246499061584473, + "learning_rate": 1.2917430074480819e-07, + "loss": 0.0889, + "step": 920 + }, + { + "epoch": 2.901098901098901, + "grad_norm": 0.8067370653152466, + "learning_rate": 9.745189921711583e-08, + "loss": 0.0987, + "step": 925 + }, + { + "epoch": 2.9167974882260594, + "grad_norm": 0.8602482080459595, + "learning_rate": 7.017951512524879e-08, + "loss": 0.0892, + "step": 930 + }, + { + "epoch": 2.9324960753532183, + "grad_norm": 0.6953303217887878, + "learning_rate": 4.7365292216431934e-08, + "loss": 0.0833, + "step": 935 + }, + { + "epoch": 2.9481946624803768, + "grad_norm": 0.7006462216377258, + "learning_rate": 2.9016042996085067e-08, + "loss": 0.1059, + "step": 940 + }, + { + "epoch": 2.963893249607535, + "grad_norm": 0.715408444404602, + "learning_rate": 1.513724669356009e-08, + "loss": 0.0986, + "step": 945 + }, + { + "epoch": 2.979591836734694, + "grad_norm": 0.7994702458381653, + "learning_rate": 5.733047625998134e-09, + "loss": 0.1018, + "step": 950 + }, + { + "epoch": 2.9952904238618525, + "grad_norm": 0.7164477109909058, + "learning_rate": 8.062539608028052e-10, + "loss": 0.0875, + "step": 955 + }, + { + "epoch": 3.0, + "step": 957, + "total_flos": 1.1998547264478904e+18, + "train_loss": 0.4352821594682233, + "train_runtime": 980.5017, + "train_samples_per_second": 31.175, + "train_steps_per_second": 0.976 + } + ], + "logging_steps": 5, + "max_steps": 957, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1998547264478904e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b52634e9d1e88e3edd7367dae8638165a101a76b --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/11_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f1dfcdbf5b11690a890c51237b4518333cd6e3ede6e2138074ad99fe0d98ee +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d510b7dbc1a3bfd85ad8da2345172314cef185d9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 12_128_e3_3e-5 + results: [] +--- + + + +# 12_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..701f2f78ae42a1f85c22e15efa474029c1baec05 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "down_proj", + "v_proj", + "gate_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4c8a619e1c49bd21273c51c5415fc5609e80f92 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cdb61eac68fa6de9f1f87ba7852e235abd24e46bb967bfdde45571ea799fd83 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..09521db0f70b33cb43da3ea0c2fc3402f954ebcf --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2584939702378824e+18, + "train_loss": 0.4064867097967684, + "train_runtime": 1021.6736, + "train_samples": 10970, + "train_samples_per_second": 32.212, + "train_steps_per_second": 1.007 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..09521db0f70b33cb43da3ea0c2fc3402f954ebcf --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2584939702378824e+18, + "train_loss": 0.4064867097967684, + "train_runtime": 1021.6736, + "train_samples": 10970, + "train_samples_per_second": 32.212, + "train_steps_per_second": 1.007 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..04f3da6b62ceb7650db81f3d6ec771bd204e289e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1478 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1029, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014577259475218658, + "grad_norm": 0.7953269481658936, + "learning_rate": 2.307692307692308e-06, + "loss": 1.4931, + "step": 5 + }, + { + "epoch": 0.029154518950437316, + "grad_norm": 0.5668715834617615, + "learning_rate": 5.192307692307692e-06, + "loss": 1.5461, + "step": 10 + }, + { + "epoch": 0.043731778425655975, + "grad_norm": 0.7700757384300232, + "learning_rate": 8.076923076923077e-06, + "loss": 1.5557, + "step": 15 + }, + { + "epoch": 0.05830903790087463, + "grad_norm": 0.5006047487258911, + "learning_rate": 1.0961538461538462e-05, + "loss": 1.5317, + "step": 20 + }, + { + "epoch": 0.0728862973760933, + "grad_norm": 0.5654619336128235, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.4736, + "step": 25 + }, + { + "epoch": 0.08746355685131195, + "grad_norm": 0.5280630588531494, + "learning_rate": 1.673076923076923e-05, + "loss": 1.4661, + "step": 30 + }, + { + "epoch": 0.10204081632653061, + "grad_norm": 0.4592368006706238, + "learning_rate": 1.9615384615384617e-05, + "loss": 1.4109, + "step": 35 + }, + { + "epoch": 0.11661807580174927, + "grad_norm": 0.5890766978263855, + "learning_rate": 2.25e-05, + "loss": 1.4162, + "step": 40 + }, + { + "epoch": 0.13119533527696792, + "grad_norm": 0.5005080699920654, + "learning_rate": 2.5384615384615386e-05, + "loss": 1.3989, + "step": 45 + }, + { + "epoch": 0.1457725947521866, + "grad_norm": 0.5818098783493042, + "learning_rate": 2.8269230769230768e-05, + "loss": 1.4116, + "step": 50 + }, + { + "epoch": 0.16034985422740525, + "grad_norm": 0.578619658946991, + "learning_rate": 2.999968980815508e-05, + "loss": 1.3233, + "step": 55 + }, + { + "epoch": 0.1749271137026239, + "grad_norm": 0.5452526807785034, + "learning_rate": 2.999620029723336e-05, + "loss": 1.2956, + "step": 60 + }, + { + "epoch": 0.18950437317784258, + "grad_norm": 0.5833545327186584, + "learning_rate": 2.998883444058916e-05, + "loss": 1.2188, + "step": 65 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.6610287427902222, + "learning_rate": 2.9977594142211905e-05, + "loss": 1.3004, + "step": 70 + }, + { + "epoch": 0.21865889212827988, + "grad_norm": 0.6509256958961487, + "learning_rate": 2.996248230758977e-05, + "loss": 1.2807, + "step": 75 + }, + { + "epoch": 0.23323615160349853, + "grad_norm": 0.6828907132148743, + "learning_rate": 2.9943502842958625e-05, + "loss": 1.1782, + "step": 80 + }, + { + "epoch": 0.2478134110787172, + "grad_norm": 0.6541049480438232, + "learning_rate": 2.992066065429232e-05, + "loss": 1.1719, + "step": 85 + }, + { + "epoch": 0.26239067055393583, + "grad_norm": 0.6864221096038818, + "learning_rate": 2.989396164603455e-05, + "loss": 1.171, + "step": 90 + }, + { + "epoch": 0.27696793002915454, + "grad_norm": 0.6274526715278625, + "learning_rate": 2.986341271957263e-05, + "loss": 1.121, + "step": 95 + }, + { + "epoch": 0.2915451895043732, + "grad_norm": 0.7996557950973511, + "learning_rate": 2.9829021771453552e-05, + "loss": 1.1474, + "step": 100 + }, + { + "epoch": 0.30612244897959184, + "grad_norm": 0.8907520174980164, + "learning_rate": 2.9790797691342838e-05, + "loss": 1.0564, + "step": 105 + }, + { + "epoch": 0.3206997084548105, + "grad_norm": 0.7863233685493469, + "learning_rate": 2.9748750359726635e-05, + "loss": 1.041, + "step": 110 + }, + { + "epoch": 0.33527696793002915, + "grad_norm": 0.7840473651885986, + "learning_rate": 2.9702890645357753e-05, + "loss": 1.0358, + "step": 115 + }, + { + "epoch": 0.3498542274052478, + "grad_norm": 0.7027601003646851, + "learning_rate": 2.9653230402446202e-05, + "loss": 0.9685, + "step": 120 + }, + { + "epoch": 0.36443148688046645, + "grad_norm": 0.8280933499336243, + "learning_rate": 2.959978246759501e-05, + "loss": 0.9782, + "step": 125 + }, + { + "epoch": 0.37900874635568516, + "grad_norm": 0.9119365811347961, + "learning_rate": 2.9542560656482116e-05, + "loss": 0.9512, + "step": 130 + }, + { + "epoch": 0.3935860058309038, + "grad_norm": 0.8328577876091003, + "learning_rate": 2.9481579760289176e-05, + "loss": 0.9782, + "step": 135 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0267574787139893, + "learning_rate": 2.9416855541878215e-05, + "loss": 0.8816, + "step": 140 + }, + { + "epoch": 0.4227405247813411, + "grad_norm": 0.9783591032028198, + "learning_rate": 2.9348404731717092e-05, + "loss": 0.893, + "step": 145 + }, + { + "epoch": 0.43731778425655976, + "grad_norm": 1.0068713426589966, + "learning_rate": 2.9276245023554888e-05, + "loss": 0.9091, + "step": 150 + }, + { + "epoch": 0.4518950437317784, + "grad_norm": 0.9492970108985901, + "learning_rate": 2.9200395069848253e-05, + "loss": 0.8827, + "step": 155 + }, + { + "epoch": 0.46647230320699706, + "grad_norm": 1.0323827266693115, + "learning_rate": 2.912087447693999e-05, + "loss": 0.8229, + "step": 160 + }, + { + "epoch": 0.48104956268221577, + "grad_norm": 0.9389294981956482, + "learning_rate": 2.9037703799991007e-05, + "loss": 0.7912, + "step": 165 + }, + { + "epoch": 0.4956268221574344, + "grad_norm": 0.9428365230560303, + "learning_rate": 2.8950904537667057e-05, + "loss": 0.8478, + "step": 170 + }, + { + "epoch": 0.5102040816326531, + "grad_norm": 0.9541400671005249, + "learning_rate": 2.8860499126581593e-05, + "loss": 0.7829, + "step": 175 + }, + { + "epoch": 0.5247813411078717, + "grad_norm": 1.0147961378097534, + "learning_rate": 2.876651093549613e-05, + "loss": 0.7811, + "step": 180 + }, + { + "epoch": 0.5393586005830904, + "grad_norm": 1.034030795097351, + "learning_rate": 2.866896425927973e-05, + "loss": 0.6729, + "step": 185 + }, + { + "epoch": 0.5539358600583091, + "grad_norm": 1.2523123025894165, + "learning_rate": 2.8567884312629023e-05, + "loss": 0.7767, + "step": 190 + }, + { + "epoch": 0.5685131195335277, + "grad_norm": 1.0398420095443726, + "learning_rate": 2.8463297223550495e-05, + "loss": 0.7148, + "step": 195 + }, + { + "epoch": 0.5830903790087464, + "grad_norm": 1.0629414319992065, + "learning_rate": 2.8355230026606743e-05, + "loss": 0.7157, + "step": 200 + }, + { + "epoch": 0.597667638483965, + "grad_norm": 1.0651006698608398, + "learning_rate": 2.8243710655928287e-05, + "loss": 0.6918, + "step": 205 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 1.1398804187774658, + "learning_rate": 2.8128767937992963e-05, + "loss": 0.6604, + "step": 210 + }, + { + "epoch": 0.6268221574344023, + "grad_norm": 1.1641172170639038, + "learning_rate": 2.8010431584174605e-05, + "loss": 0.7014, + "step": 215 + }, + { + "epoch": 0.641399416909621, + "grad_norm": 1.262837529182434, + "learning_rate": 2.788873218306298e-05, + "loss": 0.592, + "step": 220 + }, + { + "epoch": 0.6559766763848397, + "grad_norm": 1.2907779216766357, + "learning_rate": 2.7763701192556985e-05, + "loss": 0.643, + "step": 225 + }, + { + "epoch": 0.6705539358600583, + "grad_norm": 1.158223032951355, + "learning_rate": 2.7635370931733167e-05, + "loss": 0.6415, + "step": 230 + }, + { + "epoch": 0.685131195335277, + "grad_norm": 1.105440616607666, + "learning_rate": 2.750377457249159e-05, + "loss": 0.6181, + "step": 235 + }, + { + "epoch": 0.6997084548104956, + "grad_norm": 1.1043587923049927, + "learning_rate": 2.7368946130981286e-05, + "loss": 0.6112, + "step": 240 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.2348814010620117, + "learning_rate": 2.7230920458807473e-05, + "loss": 0.6367, + "step": 245 + }, + { + "epoch": 0.7288629737609329, + "grad_norm": 1.2944717407226562, + "learning_rate": 2.708973323402281e-05, + "loss": 0.6037, + "step": 250 + }, + { + "epoch": 0.7434402332361516, + "grad_norm": 1.1250150203704834, + "learning_rate": 2.6945420951905038e-05, + "loss": 0.6096, + "step": 255 + }, + { + "epoch": 0.7580174927113703, + "grad_norm": 1.1479696035385132, + "learning_rate": 2.6798020915523362e-05, + "loss": 0.5442, + "step": 260 + }, + { + "epoch": 0.7725947521865889, + "grad_norm": 1.3492239713668823, + "learning_rate": 2.6647571226096027e-05, + "loss": 0.5835, + "step": 265 + }, + { + "epoch": 0.7871720116618076, + "grad_norm": 1.232110619544983, + "learning_rate": 2.6494110773141627e-05, + "loss": 0.572, + "step": 270 + }, + { + "epoch": 0.8017492711370262, + "grad_norm": 1.207140564918518, + "learning_rate": 2.633767922442656e-05, + "loss": 0.621, + "step": 275 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 1.2151309251785278, + "learning_rate": 2.6178317015711397e-05, + "loss": 0.5612, + "step": 280 + }, + { + "epoch": 0.8309037900874635, + "grad_norm": 1.1312267780303955, + "learning_rate": 2.6016065340298706e-05, + "loss": 0.567, + "step": 285 + }, + { + "epoch": 0.8454810495626822, + "grad_norm": 1.3603981733322144, + "learning_rate": 2.585096613838502e-05, + "loss": 0.5706, + "step": 290 + }, + { + "epoch": 0.8600583090379009, + "grad_norm": 1.4238096475601196, + "learning_rate": 2.5683062086219837e-05, + "loss": 0.5325, + "step": 295 + }, + { + "epoch": 0.8746355685131195, + "grad_norm": 1.3225473165512085, + "learning_rate": 2.5512396585074234e-05, + "loss": 0.5371, + "step": 300 + }, + { + "epoch": 0.8892128279883382, + "grad_norm": 1.1898998022079468, + "learning_rate": 2.533901375002218e-05, + "loss": 0.5239, + "step": 305 + }, + { + "epoch": 0.9037900874635568, + "grad_norm": 1.1759053468704224, + "learning_rate": 2.516295839853728e-05, + "loss": 0.5131, + "step": 310 + }, + { + "epoch": 0.9183673469387755, + "grad_norm": 1.2333215475082397, + "learning_rate": 2.498427603890798e-05, + "loss": 0.5409, + "step": 315 + }, + { + "epoch": 0.9329446064139941, + "grad_norm": 1.2971348762512207, + "learning_rate": 2.480301285847418e-05, + "loss": 0.4814, + "step": 320 + }, + { + "epoch": 0.9475218658892128, + "grad_norm": 1.3949518203735352, + "learning_rate": 2.461921571168836e-05, + "loss": 0.4552, + "step": 325 + }, + { + "epoch": 0.9620991253644315, + "grad_norm": 1.219107985496521, + "learning_rate": 2.4432932108004214e-05, + "loss": 0.4323, + "step": 330 + }, + { + "epoch": 0.9766763848396501, + "grad_norm": 1.3206461668014526, + "learning_rate": 2.4244210199596038e-05, + "loss": 0.4694, + "step": 335 + }, + { + "epoch": 0.9912536443148688, + "grad_norm": 1.2505484819412231, + "learning_rate": 2.4053098768911905e-05, + "loss": 0.4045, + "step": 340 + }, + { + "epoch": 1.0058309037900874, + "grad_norm": 1.1753484010696411, + "learning_rate": 2.385964721606399e-05, + "loss": 0.4391, + "step": 345 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 1.2595179080963135, + "learning_rate": 2.3663905546059192e-05, + "loss": 0.4172, + "step": 350 + }, + { + "epoch": 1.0349854227405249, + "grad_norm": 1.3151198625564575, + "learning_rate": 2.3465924355873433e-05, + "loss": 0.3697, + "step": 355 + }, + { + "epoch": 1.0495626822157433, + "grad_norm": 1.204174280166626, + "learning_rate": 2.326575482137289e-05, + "loss": 0.3876, + "step": 360 + }, + { + "epoch": 1.064139941690962, + "grad_norm": 1.2161316871643066, + "learning_rate": 2.3063448684085653e-05, + "loss": 0.3657, + "step": 365 + }, + { + "epoch": 1.0787172011661808, + "grad_norm": 1.1754754781723022, + "learning_rate": 2.2859058237827114e-05, + "loss": 0.3254, + "step": 370 + }, + { + "epoch": 1.0932944606413995, + "grad_norm": 1.2916978597640991, + "learning_rate": 2.265263631518261e-05, + "loss": 0.3567, + "step": 375 + }, + { + "epoch": 1.1078717201166182, + "grad_norm": 1.4660751819610596, + "learning_rate": 2.2444236273850837e-05, + "loss": 0.3344, + "step": 380 + }, + { + "epoch": 1.1224489795918366, + "grad_norm": 1.1774286031723022, + "learning_rate": 2.2233911982851452e-05, + "loss": 0.3402, + "step": 385 + }, + { + "epoch": 1.1370262390670554, + "grad_norm": 1.316292643547058, + "learning_rate": 2.2021717808600602e-05, + "loss": 0.3885, + "step": 390 + }, + { + "epoch": 1.151603498542274, + "grad_norm": 1.2388097047805786, + "learning_rate": 2.1807708600857773e-05, + "loss": 0.3504, + "step": 395 + }, + { + "epoch": 1.1661807580174928, + "grad_norm": 1.3586658239364624, + "learning_rate": 2.15919396785478e-05, + "loss": 0.3064, + "step": 400 + }, + { + "epoch": 1.1807580174927113, + "grad_norm": 1.3989168405532837, + "learning_rate": 2.1374466815461536e-05, + "loss": 0.395, + "step": 405 + }, + { + "epoch": 1.19533527696793, + "grad_norm": 1.2700116634368896, + "learning_rate": 2.1155346225838994e-05, + "loss": 0.289, + "step": 410 + }, + { + "epoch": 1.2099125364431487, + "grad_norm": 1.2492783069610596, + "learning_rate": 2.0934634549838567e-05, + "loss": 0.2937, + "step": 415 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 1.41838538646698, + "learning_rate": 2.0712388838896236e-05, + "loss": 0.3094, + "step": 420 + }, + { + "epoch": 1.239067055393586, + "grad_norm": 1.1972510814666748, + "learning_rate": 2.0488666540978405e-05, + "loss": 0.3234, + "step": 425 + }, + { + "epoch": 1.2536443148688048, + "grad_norm": 1.4537353515625, + "learning_rate": 2.0263525485732285e-05, + "loss": 0.3136, + "step": 430 + }, + { + "epoch": 1.2682215743440233, + "grad_norm": 1.2058525085449219, + "learning_rate": 2.0037023869537567e-05, + "loss": 0.2957, + "step": 435 + }, + { + "epoch": 1.282798833819242, + "grad_norm": 1.3250120878219604, + "learning_rate": 1.980922024046333e-05, + "loss": 0.2697, + "step": 440 + }, + { + "epoch": 1.2973760932944607, + "grad_norm": 1.1300866603851318, + "learning_rate": 1.9580173483134066e-05, + "loss": 0.2828, + "step": 445 + }, + { + "epoch": 1.3119533527696792, + "grad_norm": 1.2814719676971436, + "learning_rate": 1.934994280350864e-05, + "loss": 0.2694, + "step": 450 + }, + { + "epoch": 1.3265306122448979, + "grad_norm": 1.1166484355926514, + "learning_rate": 1.9118587713576264e-05, + "loss": 0.3045, + "step": 455 + }, + { + "epoch": 1.3411078717201166, + "grad_norm": 1.2349168062210083, + "learning_rate": 1.8886168015973288e-05, + "loss": 0.2791, + "step": 460 + }, + { + "epoch": 1.3556851311953353, + "grad_norm": 1.2965843677520752, + "learning_rate": 1.8652743788524952e-05, + "loss": 0.3056, + "step": 465 + }, + { + "epoch": 1.370262390670554, + "grad_norm": 1.3584120273590088, + "learning_rate": 1.8418375368715927e-05, + "loss": 0.3055, + "step": 470 + }, + { + "epoch": 1.3848396501457727, + "grad_norm": 1.1526381969451904, + "learning_rate": 1.818312333809379e-05, + "loss": 0.27, + "step": 475 + }, + { + "epoch": 1.3994169096209912, + "grad_norm": 1.3759846687316895, + "learning_rate": 1.7947048506609346e-05, + "loss": 0.2522, + "step": 480 + }, + { + "epoch": 1.41399416909621, + "grad_norm": 1.2939045429229736, + "learning_rate": 1.7710211896898013e-05, + "loss": 0.265, + "step": 485 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.1933553218841553, + "learning_rate": 1.7472674728506124e-05, + "loss": 0.253, + "step": 490 + }, + { + "epoch": 1.4431486880466473, + "grad_norm": 1.3062920570373535, + "learning_rate": 1.72344984020664e-05, + "loss": 0.2425, + "step": 495 + }, + { + "epoch": 1.4577259475218658, + "grad_norm": 1.4010287523269653, + "learning_rate": 1.6995744483426586e-05, + "loss": 0.2347, + "step": 500 + }, + { + "epoch": 1.4723032069970845, + "grad_norm": 1.2163918018341064, + "learning_rate": 1.6756474687735368e-05, + "loss": 0.2471, + "step": 505 + }, + { + "epoch": 1.4868804664723032, + "grad_norm": 1.5639783143997192, + "learning_rate": 1.6516750863489755e-05, + "loss": 0.2167, + "step": 510 + }, + { + "epoch": 1.501457725947522, + "grad_norm": 1.1700334548950195, + "learning_rate": 1.627663497654791e-05, + "loss": 0.2198, + "step": 515 + }, + { + "epoch": 1.5160349854227406, + "grad_norm": 1.2296478748321533, + "learning_rate": 1.6036189094111707e-05, + "loss": 0.2219, + "step": 520 + }, + { + "epoch": 1.5306122448979593, + "grad_norm": 1.2432500123977661, + "learning_rate": 1.579547536868308e-05, + "loss": 0.2459, + "step": 525 + }, + { + "epoch": 1.5451895043731778, + "grad_norm": 1.2667638063430786, + "learning_rate": 1.5554556021998294e-05, + "loss": 0.2516, + "step": 530 + }, + { + "epoch": 1.5597667638483965, + "grad_norm": 1.212599754333496, + "learning_rate": 1.531349332894435e-05, + "loss": 0.2133, + "step": 535 + }, + { + "epoch": 1.574344023323615, + "grad_norm": 1.2042200565338135, + "learning_rate": 1.5072349601461645e-05, + "loss": 0.2494, + "step": 540 + }, + { + "epoch": 1.5889212827988337, + "grad_norm": 1.1065187454223633, + "learning_rate": 1.4831187172437047e-05, + "loss": 0.2223, + "step": 545 + }, + { + "epoch": 1.6034985422740524, + "grad_norm": 1.2919031381607056, + "learning_rate": 1.4590068379591544e-05, + "loss": 0.1702, + "step": 550 + }, + { + "epoch": 1.6180758017492711, + "grad_norm": 1.3987963199615479, + "learning_rate": 1.4349055549366696e-05, + "loss": 0.231, + "step": 555 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 1.2252508401870728, + "learning_rate": 1.4108210980813923e-05, + "loss": 0.2106, + "step": 560 + }, + { + "epoch": 1.6472303206997085, + "grad_norm": 1.2649791240692139, + "learning_rate": 1.3867596929490935e-05, + "loss": 0.1797, + "step": 565 + }, + { + "epoch": 1.6618075801749272, + "grad_norm": 1.0990451574325562, + "learning_rate": 1.3627275591369394e-05, + "loss": 0.1922, + "step": 570 + }, + { + "epoch": 1.6763848396501457, + "grad_norm": 1.2402002811431885, + "learning_rate": 1.3387309086757945e-05, + "loss": 0.1999, + "step": 575 + }, + { + "epoch": 1.6909620991253644, + "grad_norm": 1.3797203302383423, + "learning_rate": 1.3147759444244851e-05, + "loss": 0.1873, + "step": 580 + }, + { + "epoch": 1.7055393586005831, + "grad_norm": 1.1798473596572876, + "learning_rate": 1.2908688584664303e-05, + "loss": 0.1819, + "step": 585 + }, + { + "epoch": 1.7201166180758016, + "grad_norm": 1.2419990301132202, + "learning_rate": 1.2670158305090588e-05, + "loss": 0.1792, + "step": 590 + }, + { + "epoch": 1.7346938775510203, + "grad_norm": 1.224246859550476, + "learning_rate": 1.2432230262864278e-05, + "loss": 0.2006, + "step": 595 + }, + { + "epoch": 1.749271137026239, + "grad_norm": 1.2102625370025635, + "learning_rate": 1.2194965959654487e-05, + "loss": 0.1934, + "step": 600 + }, + { + "epoch": 1.7638483965014577, + "grad_norm": 1.1621501445770264, + "learning_rate": 1.1958426725561423e-05, + "loss": 0.1717, + "step": 605 + }, + { + "epoch": 1.7784256559766765, + "grad_norm": 1.1954578161239624, + "learning_rate": 1.1722673703263228e-05, + "loss": 0.1821, + "step": 610 + }, + { + "epoch": 1.7930029154518952, + "grad_norm": 1.1282013654708862, + "learning_rate": 1.1487767832211336e-05, + "loss": 0.1804, + "step": 615 + }, + { + "epoch": 1.8075801749271136, + "grad_norm": 1.1317360401153564, + "learning_rate": 1.125376983287829e-05, + "loss": 0.1791, + "step": 620 + }, + { + "epoch": 1.8221574344023324, + "grad_norm": 1.0159201622009277, + "learning_rate": 1.1020740191062205e-05, + "loss": 0.1398, + "step": 625 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 1.3254249095916748, + "learning_rate": 1.0788739142251905e-05, + "loss": 0.1624, + "step": 630 + }, + { + "epoch": 1.8513119533527695, + "grad_norm": 1.0603454113006592, + "learning_rate": 1.0557826656056703e-05, + "loss": 0.1362, + "step": 635 + }, + { + "epoch": 1.8658892128279883, + "grad_norm": 1.319517731666565, + "learning_rate": 1.032806242070499e-05, + "loss": 0.1802, + "step": 640 + }, + { + "epoch": 1.880466472303207, + "grad_norm": 1.0893645286560059, + "learning_rate": 1.0099505827615461e-05, + "loss": 0.1615, + "step": 645 + }, + { + "epoch": 1.8950437317784257, + "grad_norm": 1.0426321029663086, + "learning_rate": 9.872215956045165e-06, + "loss": 0.1668, + "step": 650 + }, + { + "epoch": 1.9096209912536444, + "grad_norm": 1.406479835510254, + "learning_rate": 9.646251557818154e-06, + "loss": 0.1475, + "step": 655 + }, + { + "epoch": 1.924198250728863, + "grad_norm": 0.9760271310806274, + "learning_rate": 9.42167104213885e-06, + "loss": 0.1386, + "step": 660 + }, + { + "epoch": 1.9387755102040818, + "grad_norm": 1.2772929668426514, + "learning_rate": 9.198532460493878e-06, + "loss": 0.1698, + "step": 665 + }, + { + "epoch": 1.9533527696793003, + "grad_norm": 1.1725008487701416, + "learning_rate": 8.976893491646494e-06, + "loss": 0.1467, + "step": 670 + }, + { + "epoch": 1.967930029154519, + "grad_norm": 1.0926477909088135, + "learning_rate": 8.756811426727236e-06, + "loss": 0.1321, + "step": 675 + }, + { + "epoch": 1.9825072886297375, + "grad_norm": 1.0477861166000366, + "learning_rate": 8.538343154424838e-06, + "loss": 0.1501, + "step": 680 + }, + { + "epoch": 1.9970845481049562, + "grad_norm": 1.1073461771011353, + "learning_rate": 8.321545146281162e-06, + "loss": 0.1396, + "step": 685 + }, + { + "epoch": 2.011661807580175, + "grad_norm": 0.9271028637886047, + "learning_rate": 8.106473442093946e-06, + "loss": 0.1248, + "step": 690 + }, + { + "epoch": 2.0262390670553936, + "grad_norm": 0.8741105794906616, + "learning_rate": 7.893183635431172e-06, + "loss": 0.1283, + "step": 695 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.9803028702735901, + "learning_rate": 7.681730859260739e-06, + "loss": 0.1067, + "step": 700 + }, + { + "epoch": 2.055393586005831, + "grad_norm": 0.8040251731872559, + "learning_rate": 7.472169771699238e-06, + "loss": 0.0951, + "step": 705 + }, + { + "epoch": 2.0699708454810497, + "grad_norm": 0.9432370066642761, + "learning_rate": 7.264554541883481e-06, + "loss": 0.1005, + "step": 710 + }, + { + "epoch": 2.0845481049562684, + "grad_norm": 1.0905773639678955, + "learning_rate": 7.058938835968361e-06, + "loss": 0.1148, + "step": 715 + }, + { + "epoch": 2.0991253644314867, + "grad_norm": 0.9996586441993713, + "learning_rate": 6.8553758032547935e-06, + "loss": 0.1129, + "step": 720 + }, + { + "epoch": 2.1137026239067054, + "grad_norm": 1.2049249410629272, + "learning_rate": 6.653918062451206e-06, + "loss": 0.1026, + "step": 725 + }, + { + "epoch": 2.128279883381924, + "grad_norm": 1.010697841644287, + "learning_rate": 6.454617688072235e-06, + "loss": 0.1194, + "step": 730 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 0.8009735345840454, + "learning_rate": 6.257526196978049e-06, + "loss": 0.1027, + "step": 735 + }, + { + "epoch": 2.1574344023323615, + "grad_norm": 1.2351713180541992, + "learning_rate": 6.062694535057829e-06, + "loss": 0.1065, + "step": 740 + }, + { + "epoch": 2.17201166180758, + "grad_norm": 0.99173903465271, + "learning_rate": 5.8701730640608354e-06, + "loss": 0.1052, + "step": 745 + }, + { + "epoch": 2.186588921282799, + "grad_norm": 1.088194727897644, + "learning_rate": 5.680011548578479e-06, + "loss": 0.1044, + "step": 750 + }, + { + "epoch": 2.2011661807580176, + "grad_norm": 0.94852215051651, + "learning_rate": 5.492259143180757e-06, + "loss": 0.0955, + "step": 755 + }, + { + "epoch": 2.2157434402332363, + "grad_norm": 0.8313064575195312, + "learning_rate": 5.306964379710347e-06, + "loss": 0.0939, + "step": 760 + }, + { + "epoch": 2.2303206997084546, + "grad_norm": 0.9789465069770813, + "learning_rate": 5.124175154737676e-06, + "loss": 0.0908, + "step": 765 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 1.2305395603179932, + "learning_rate": 4.9439387171802265e-06, + "loss": 0.1091, + "step": 770 + }, + { + "epoch": 2.259475218658892, + "grad_norm": 1.1985671520233154, + "learning_rate": 4.766301656089219e-06, + "loss": 0.1275, + "step": 775 + }, + { + "epoch": 2.2740524781341107, + "grad_norm": 0.8270859718322754, + "learning_rate": 4.591309888606886e-06, + "loss": 0.081, + "step": 780 + }, + { + "epoch": 2.2886297376093294, + "grad_norm": 1.0125635862350464, + "learning_rate": 4.419008648097402e-06, + "loss": 0.0979, + "step": 785 + }, + { + "epoch": 2.303206997084548, + "grad_norm": 0.9175333380699158, + "learning_rate": 4.249442472454586e-06, + "loss": 0.1023, + "step": 790 + }, + { + "epoch": 2.317784256559767, + "grad_norm": 0.761922299861908, + "learning_rate": 4.082655192589382e-06, + "loss": 0.0923, + "step": 795 + }, + { + "epoch": 2.3323615160349855, + "grad_norm": 0.8750976324081421, + "learning_rate": 3.918689921100051e-06, + "loss": 0.0794, + "step": 800 + }, + { + "epoch": 2.3469387755102042, + "grad_norm": 0.9664970636367798, + "learning_rate": 3.7575890411280733e-06, + "loss": 0.0921, + "step": 805 + }, + { + "epoch": 2.3615160349854225, + "grad_norm": 0.9252130389213562, + "learning_rate": 3.59939419540257e-06, + "loss": 0.1105, + "step": 810 + }, + { + "epoch": 2.376093294460641, + "grad_norm": 0.9141732454299927, + "learning_rate": 3.4441462754761655e-06, + "loss": 0.1025, + "step": 815 + }, + { + "epoch": 2.39067055393586, + "grad_norm": 1.0694290399551392, + "learning_rate": 3.291885411154973e-06, + "loss": 0.0857, + "step": 820 + }, + { + "epoch": 2.4052478134110786, + "grad_norm": 0.7673351168632507, + "learning_rate": 3.1426509601254955e-06, + "loss": 0.1008, + "step": 825 + }, + { + "epoch": 2.4198250728862973, + "grad_norm": 0.79085773229599, + "learning_rate": 2.9964814977811327e-06, + "loss": 0.0809, + "step": 830 + }, + { + "epoch": 2.434402332361516, + "grad_norm": 0.9044321775436401, + "learning_rate": 2.8534148072508637e-06, + "loss": 0.083, + "step": 835 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.7718706130981445, + "learning_rate": 2.713487869632762e-06, + "loss": 0.1013, + "step": 840 + }, + { + "epoch": 2.4635568513119535, + "grad_norm": 0.8005554676055908, + "learning_rate": 2.576736854434793e-06, + "loss": 0.0775, + "step": 845 + }, + { + "epoch": 2.478134110787172, + "grad_norm": 0.8394175171852112, + "learning_rate": 2.4431971102254176e-06, + "loss": 0.083, + "step": 850 + }, + { + "epoch": 2.4927113702623904, + "grad_norm": 0.6213698983192444, + "learning_rate": 2.312903155496382e-06, + "loss": 0.0843, + "step": 855 + }, + { + "epoch": 2.5072886297376096, + "grad_norm": 0.8491662740707397, + "learning_rate": 2.1858886697400916e-06, + "loss": 0.089, + "step": 860 + }, + { + "epoch": 2.521865889212828, + "grad_norm": 0.8924172520637512, + "learning_rate": 2.062186484743858e-06, + "loss": 0.0804, + "step": 865 + }, + { + "epoch": 2.5364431486880465, + "grad_norm": 0.7493453621864319, + "learning_rate": 1.9418285761032432e-06, + "loss": 0.0753, + "step": 870 + }, + { + "epoch": 2.5510204081632653, + "grad_norm": 0.7781217694282532, + "learning_rate": 1.8248460549567375e-06, + "loss": 0.0969, + "step": 875 + }, + { + "epoch": 2.565597667638484, + "grad_norm": 0.7646186351776123, + "learning_rate": 1.7112691599438963e-06, + "loss": 0.0903, + "step": 880 + }, + { + "epoch": 2.5801749271137027, + "grad_norm": 0.7781457901000977, + "learning_rate": 1.6011272493889879e-06, + "loss": 0.0839, + "step": 885 + }, + { + "epoch": 2.5947521865889214, + "grad_norm": 0.9612010717391968, + "learning_rate": 1.494448793712217e-06, + "loss": 0.0789, + "step": 890 + }, + { + "epoch": 2.60932944606414, + "grad_norm": 0.8082157373428345, + "learning_rate": 1.3912613680704284e-06, + "loss": 0.0839, + "step": 895 + }, + { + "epoch": 2.6239067055393583, + "grad_norm": 0.8923764824867249, + "learning_rate": 1.2915916452292598e-06, + "loss": 0.0912, + "step": 900 + }, + { + "epoch": 2.6384839650145775, + "grad_norm": 0.8558257818222046, + "learning_rate": 1.1954653886685356e-06, + "loss": 0.091, + "step": 905 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7514873147010803, + "learning_rate": 1.102907445922703e-06, + "loss": 0.0793, + "step": 910 + }, + { + "epoch": 2.6676384839650145, + "grad_norm": 0.9386159777641296, + "learning_rate": 1.0139417421580294e-06, + "loss": 0.0977, + "step": 915 + }, + { + "epoch": 2.682215743440233, + "grad_norm": 0.711701512336731, + "learning_rate": 9.285912739882118e-07, + "loss": 0.0771, + "step": 920 + }, + { + "epoch": 2.696793002915452, + "grad_norm": 0.6772379875183105, + "learning_rate": 8.468781035300305e-07, + "loss": 0.0799, + "step": 925 + }, + { + "epoch": 2.7113702623906706, + "grad_norm": 0.8089955449104309, + "learning_rate": 7.688233527005373e-07, + "loss": 0.0889, + "step": 930 + }, + { + "epoch": 2.7259475218658893, + "grad_norm": 0.6511778831481934, + "learning_rate": 6.944471977572919e-07, + "loss": 0.0837, + "step": 935 + }, + { + "epoch": 2.740524781341108, + "grad_norm": 0.7935739159584045, + "learning_rate": 6.237688640830019e-07, + "loss": 0.0997, + "step": 940 + }, + { + "epoch": 2.7551020408163263, + "grad_norm": 0.6699678301811218, + "learning_rate": 5.568066212160227e-07, + "loss": 0.0873, + "step": 945 + }, + { + "epoch": 2.7696793002915454, + "grad_norm": 0.6568289995193481, + "learning_rate": 4.935777781278655e-07, + "loss": 0.0773, + "step": 950 + }, + { + "epoch": 2.7842565597667637, + "grad_norm": 0.6719860434532166, + "learning_rate": 4.340986787490292e-07, + "loss": 0.0776, + "step": 955 + }, + { + "epoch": 2.7988338192419824, + "grad_norm": 0.658849835395813, + "learning_rate": 3.783846977442884e-07, + "loss": 0.0753, + "step": 960 + }, + { + "epoch": 2.813411078717201, + "grad_norm": 0.6978284120559692, + "learning_rate": 3.264502365385158e-07, + "loss": 0.0915, + "step": 965 + }, + { + "epoch": 2.82798833819242, + "grad_norm": 0.8883819580078125, + "learning_rate": 2.783087195940781e-07, + "loss": 0.0896, + "step": 970 + }, + { + "epoch": 2.8425655976676385, + "grad_norm": 0.6045119166374207, + "learning_rate": 2.339725909407703e-07, + "loss": 0.0672, + "step": 975 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.7241390943527222, + "learning_rate": 1.9345331095917184e-07, + "loss": 0.0772, + "step": 980 + }, + { + "epoch": 2.871720116618076, + "grad_norm": 0.6567550301551819, + "learning_rate": 1.5676135341826825e-07, + "loss": 0.0779, + "step": 985 + }, + { + "epoch": 2.8862973760932946, + "grad_norm": 0.5955002307891846, + "learning_rate": 1.2390620276811316e-07, + "loss": 0.084, + "step": 990 + }, + { + "epoch": 2.9008746355685133, + "grad_norm": 0.7174606919288635, + "learning_rate": 9.489635168819621e-08, + "loss": 0.0765, + "step": 995 + }, + { + "epoch": 2.9154518950437316, + "grad_norm": 0.7240132689476013, + "learning_rate": 6.973929889218578e-08, + "loss": 0.0905, + "step": 1000 + }, + { + "epoch": 2.9300291545189503, + "grad_norm": 0.810859739780426, + "learning_rate": 4.844154718959604e-08, + "loss": 0.091, + "step": 1005 + }, + { + "epoch": 2.944606413994169, + "grad_norm": 0.7027409672737122, + "learning_rate": 3.100860180489162e-08, + "loss": 0.0878, + "step": 1010 + }, + { + "epoch": 2.9591836734693877, + "grad_norm": 0.6288892030715942, + "learning_rate": 1.7444968954440922e-08, + "loss": 0.0806, + "step": 1015 + }, + { + "epoch": 2.9737609329446064, + "grad_norm": 0.6648575067520142, + "learning_rate": 7.754154681717807e-09, + "loss": 0.071, + "step": 1020 + }, + { + "epoch": 2.988338192419825, + "grad_norm": 0.9208803772926331, + "learning_rate": 1.938663951026598e-09, + "loss": 0.091, + "step": 1025 + }, + { + "epoch": 3.0, + "step": 1029, + "total_flos": 1.2584939702378824e+18, + "train_loss": 0.4064867097967684, + "train_runtime": 1021.6736, + "train_samples_per_second": 32.212, + "train_steps_per_second": 1.007 + } + ], + "logging_steps": 5, + "max_steps": 1029, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2584939702378824e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..750ec9c58758f8697cdcf70af888983d35197f7a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/12_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3c11f2669dc255311bc4f85e02e9ca4360e7f278187f9cd4b6424f2b34c596 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b487889cab9a50a7e41f915c6d53408f00926392 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 13_128_e3_3e-5 + results: [] +--- + + + +# 13_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8270f5c43f6288df7c5f7545a7907ed8840f847b --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "v_proj", + "up_proj", + "q_proj", + "down_proj", + "o_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c61da9d5e6a376f70f099a5d93c413e9570d6077 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:175649f776ada914b7b4cfd59c2a88408707e50d076c7c02f34081a365283a5d +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a3f98742849de151a22dcbee16ec44d9e8ef927d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.424766610454872e+18, + "train_loss": 0.41283174582813803, + "train_runtime": 1143.5346, + "train_samples": 11898, + "train_samples_per_second": 31.214, + "train_steps_per_second": 0.976 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a3f98742849de151a22dcbee16ec44d9e8ef927d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.424766610454872e+18, + "train_loss": 0.41283174582813803, + "train_runtime": 1143.5346, + "train_samples": 11898, + "train_samples_per_second": 31.214, + "train_steps_per_second": 0.976 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..17dadad52295da74e9f85e0c3d90d1bdd2f3bc8e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1604 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1116, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013440860215053764, + "grad_norm": 0.6008284091949463, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.5325, + "step": 5 + }, + { + "epoch": 0.026881720430107527, + "grad_norm": 0.6472765803337097, + "learning_rate": 4.821428571428572e-06, + "loss": 1.5841, + "step": 10 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 0.5214754343032837, + "learning_rate": 7.5e-06, + "loss": 1.527, + "step": 15 + }, + { + "epoch": 0.053763440860215055, + "grad_norm": 0.5769675374031067, + "learning_rate": 1.0178571428571429e-05, + "loss": 1.4608, + "step": 20 + }, + { + "epoch": 0.06720430107526881, + "grad_norm": 0.5845513343811035, + "learning_rate": 1.2857142857142857e-05, + "loss": 1.571, + "step": 25 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 0.4964092969894409, + "learning_rate": 1.553571428571429e-05, + "loss": 1.4644, + "step": 30 + }, + { + "epoch": 0.09408602150537634, + "grad_norm": 0.5321637392044067, + "learning_rate": 1.8214285714285712e-05, + "loss": 1.4289, + "step": 35 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 0.47858643531799316, + "learning_rate": 2.089285714285714e-05, + "loss": 1.4581, + "step": 40 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 0.5123903155326843, + "learning_rate": 2.357142857142857e-05, + "loss": 1.3781, + "step": 45 + }, + { + "epoch": 0.13440860215053763, + "grad_norm": 0.5339639186859131, + "learning_rate": 2.625e-05, + "loss": 1.4028, + "step": 50 + }, + { + "epoch": 0.1478494623655914, + "grad_norm": 0.5830047726631165, + "learning_rate": 2.892857142857143e-05, + "loss": 1.4043, + "step": 55 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 0.5763454437255859, + "learning_rate": 2.9999407089793328e-05, + "loss": 1.3331, + "step": 60 + }, + { + "epoch": 0.17473118279569894, + "grad_norm": 0.6084280610084534, + "learning_rate": 2.99957839193835e-05, + "loss": 1.3514, + "step": 65 + }, + { + "epoch": 0.1881720430107527, + "grad_norm": 0.6324818730354309, + "learning_rate": 2.9988867767784412e-05, + "loss": 1.2725, + "step": 70 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 0.6717544794082642, + "learning_rate": 2.997866015374007e-05, + "loss": 1.3169, + "step": 75 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 0.5896978378295898, + "learning_rate": 2.996516331877925e-05, + "loss": 1.2023, + "step": 80 + }, + { + "epoch": 0.22849462365591397, + "grad_norm": 0.6889088749885559, + "learning_rate": 2.994838022672324e-05, + "loss": 1.1677, + "step": 85 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 0.6349788308143616, + "learning_rate": 2.9928314563035015e-05, + "loss": 1.1895, + "step": 90 + }, + { + "epoch": 0.2553763440860215, + "grad_norm": 0.6352388858795166, + "learning_rate": 2.9904970734009933e-05, + "loss": 1.1704, + "step": 95 + }, + { + "epoch": 0.26881720430107525, + "grad_norm": 0.745246946811676, + "learning_rate": 2.9878353865808144e-05, + "loss": 1.1625, + "step": 100 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 0.726783275604248, + "learning_rate": 2.9848469803328896e-05, + "loss": 1.1308, + "step": 105 + }, + { + "epoch": 0.2956989247311828, + "grad_norm": 0.8427244424819946, + "learning_rate": 2.981532510892707e-05, + "loss": 1.1451, + "step": 110 + }, + { + "epoch": 0.30913978494623656, + "grad_norm": 0.7218514084815979, + "learning_rate": 2.9778927060972075e-05, + "loss": 1.0301, + "step": 115 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.8247747421264648, + "learning_rate": 2.9739283652249625e-05, + "loss": 1.0579, + "step": 120 + }, + { + "epoch": 0.33602150537634407, + "grad_norm": 0.903286874294281, + "learning_rate": 2.9696403588206517e-05, + "loss": 1.0078, + "step": 125 + }, + { + "epoch": 0.34946236559139787, + "grad_norm": 0.9745504856109619, + "learning_rate": 2.9650296285038996e-05, + "loss": 1.0109, + "step": 130 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 0.8701937198638916, + "learning_rate": 2.9600971867625027e-05, + "loss": 1.0008, + "step": 135 + }, + { + "epoch": 0.3763440860215054, + "grad_norm": 0.87418532371521, + "learning_rate": 2.95484411673009e-05, + "loss": 0.987, + "step": 140 + }, + { + "epoch": 0.3897849462365591, + "grad_norm": 0.9259530305862427, + "learning_rate": 2.9492715719482776e-05, + "loss": 0.9356, + "step": 145 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 0.8499327301979065, + "learning_rate": 2.9433807761133542e-05, + "loss": 0.9607, + "step": 150 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.9770460724830627, + "learning_rate": 2.9371730228075684e-05, + "loss": 0.9213, + "step": 155 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 0.9117967486381531, + "learning_rate": 2.9306496752150638e-05, + "loss": 0.8848, + "step": 160 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 1.0313373804092407, + "learning_rate": 2.9238121658225337e-05, + "loss": 0.8954, + "step": 165 + }, + { + "epoch": 0.45698924731182794, + "grad_norm": 0.9871340394020081, + "learning_rate": 2.9166619961046545e-05, + "loss": 0.9263, + "step": 170 + }, + { + "epoch": 0.47043010752688175, + "grad_norm": 0.9289008975028992, + "learning_rate": 2.909200736194372e-05, + "loss": 0.8386, + "step": 175 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 0.9819604158401489, + "learning_rate": 2.9014300245381095e-05, + "loss": 0.8262, + "step": 180 + }, + { + "epoch": 0.49731182795698925, + "grad_norm": 1.1638084650039673, + "learning_rate": 2.8933515675359766e-05, + "loss": 0.7958, + "step": 185 + }, + { + "epoch": 0.510752688172043, + "grad_norm": 0.9984620809555054, + "learning_rate": 2.8849671391670518e-05, + "loss": 0.8038, + "step": 190 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 1.1006667613983154, + "learning_rate": 2.8762785805998295e-05, + "loss": 0.7192, + "step": 195 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 1.2019890546798706, + "learning_rate": 2.86728779978791e-05, + "loss": 0.8133, + "step": 200 + }, + { + "epoch": 0.5510752688172043, + "grad_norm": 0.9955887198448181, + "learning_rate": 2.8579967710510264e-05, + "loss": 0.7823, + "step": 205 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 1.094082236289978, + "learning_rate": 2.8484075346414936e-05, + "loss": 0.7233, + "step": 210 + }, + { + "epoch": 0.5779569892473119, + "grad_norm": 1.139585256576538, + "learning_rate": 2.838522196296182e-05, + "loss": 0.705, + "step": 215 + }, + { + "epoch": 0.5913978494623656, + "grad_norm": 1.2508670091629028, + "learning_rate": 2.8283429267741138e-05, + "loss": 0.7279, + "step": 220 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 1.2799952030181885, + "learning_rate": 2.8178719613797747e-05, + "loss": 0.6905, + "step": 225 + }, + { + "epoch": 0.6182795698924731, + "grad_norm": 1.1042182445526123, + "learning_rate": 2.807111599472254e-05, + "loss": 0.7179, + "step": 230 + }, + { + "epoch": 0.6317204301075269, + "grad_norm": 1.1866369247436523, + "learning_rate": 2.7960642039603235e-05, + "loss": 0.6574, + "step": 235 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 1.089440107345581, + "learning_rate": 2.7847322007835546e-05, + "loss": 0.7065, + "step": 240 + }, + { + "epoch": 0.6586021505376344, + "grad_norm": 1.2733837366104126, + "learning_rate": 2.773118078379597e-05, + "loss": 0.6818, + "step": 245 + }, + { + "epoch": 0.6720430107526881, + "grad_norm": 1.1085740327835083, + "learning_rate": 2.7612243871377342e-05, + "loss": 0.6663, + "step": 250 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 1.0102565288543701, + "learning_rate": 2.749053738838834e-05, + "loss": 0.6723, + "step": 255 + }, + { + "epoch": 0.6989247311827957, + "grad_norm": 1.3682451248168945, + "learning_rate": 2.7366088060818154e-05, + "loss": 0.5917, + "step": 260 + }, + { + "epoch": 0.7123655913978495, + "grad_norm": 1.106176495552063, + "learning_rate": 2.7238923216967666e-05, + "loss": 0.67, + "step": 265 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 1.410741925239563, + "learning_rate": 2.7109070781448283e-05, + "loss": 0.6059, + "step": 270 + }, + { + "epoch": 0.739247311827957, + "grad_norm": 1.1598131656646729, + "learning_rate": 2.6976559269049875e-05, + "loss": 0.6416, + "step": 275 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 1.0877230167388916, + "learning_rate": 2.6841417778479132e-05, + "loss": 0.5918, + "step": 280 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 1.3181958198547363, + "learning_rate": 2.670367598596963e-05, + "loss": 0.5713, + "step": 285 + }, + { + "epoch": 0.7795698924731183, + "grad_norm": 1.1894084215164185, + "learning_rate": 2.6563364138765137e-05, + "loss": 0.6468, + "step": 290 + }, + { + "epoch": 0.793010752688172, + "grad_norm": 1.0916457176208496, + "learning_rate": 2.6420513048477503e-05, + "loss": 0.5703, + "step": 295 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 1.2529994249343872, + "learning_rate": 2.6275154084320622e-05, + "loss": 0.5595, + "step": 300 + }, + { + "epoch": 0.8198924731182796, + "grad_norm": 1.2032849788665771, + "learning_rate": 2.6127319166221923e-05, + "loss": 0.5449, + "step": 305 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 1.4868087768554688, + "learning_rate": 2.5977040757812997e-05, + "loss": 0.5841, + "step": 310 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 1.2388821840286255, + "learning_rate": 2.5824351859300748e-05, + "loss": 0.5268, + "step": 315 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.139684796333313, + "learning_rate": 2.5669286000220757e-05, + "loss": 0.5257, + "step": 320 + }, + { + "epoch": 0.8736559139784946, + "grad_norm": 1.4489539861679077, + "learning_rate": 2.551187723207443e-05, + "loss": 0.502, + "step": 325 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 1.3212213516235352, + "learning_rate": 2.5352160120851464e-05, + "loss": 0.4713, + "step": 330 + }, + { + "epoch": 0.9005376344086021, + "grad_norm": 1.258407473564148, + "learning_rate": 2.519016973943939e-05, + "loss": 0.523, + "step": 335 + }, + { + "epoch": 0.9139784946236559, + "grad_norm": 1.2971725463867188, + "learning_rate": 2.5025941659921783e-05, + "loss": 0.4894, + "step": 340 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 1.1608957052230835, + "learning_rate": 2.485951194576685e-05, + "loss": 0.4951, + "step": 345 + }, + { + "epoch": 0.9408602150537635, + "grad_norm": 1.5126407146453857, + "learning_rate": 2.469091714390811e-05, + "loss": 0.4649, + "step": 350 + }, + { + "epoch": 0.9543010752688172, + "grad_norm": 1.378111720085144, + "learning_rate": 2.4520194276718938e-05, + "loss": 0.5215, + "step": 355 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 1.5382301807403564, + "learning_rate": 2.4347380833882634e-05, + "loss": 0.47, + "step": 360 + }, + { + "epoch": 0.9811827956989247, + "grad_norm": 1.1426136493682861, + "learning_rate": 2.417251476415998e-05, + "loss": 0.4592, + "step": 365 + }, + { + "epoch": 0.9946236559139785, + "grad_norm": 1.3683301210403442, + "learning_rate": 2.3995634467055882e-05, + "loss": 0.4857, + "step": 370 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 1.2963612079620361, + "learning_rate": 2.3816778784387097e-05, + "loss": 0.4247, + "step": 375 + }, + { + "epoch": 1.021505376344086, + "grad_norm": 1.31647789478302, + "learning_rate": 2.363598699175281e-05, + "loss": 0.4016, + "step": 380 + }, + { + "epoch": 1.0349462365591398, + "grad_norm": 1.1075716018676758, + "learning_rate": 2.3453298789909935e-05, + "loss": 0.3607, + "step": 385 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 1.1986656188964844, + "learning_rate": 2.3268754296055122e-05, + "loss": 0.3503, + "step": 390 + }, + { + "epoch": 1.0618279569892473, + "grad_norm": 1.3048341274261475, + "learning_rate": 2.3082394035015212e-05, + "loss": 0.4035, + "step": 395 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 1.228111743927002, + "learning_rate": 2.2894258930348284e-05, + "loss": 0.4219, + "step": 400 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 1.22667396068573, + "learning_rate": 2.2704390295357056e-05, + "loss": 0.3344, + "step": 405 + }, + { + "epoch": 1.1021505376344085, + "grad_norm": 1.319088101387024, + "learning_rate": 2.2512829824016773e-05, + "loss": 0.3507, + "step": 410 + }, + { + "epoch": 1.1155913978494623, + "grad_norm": 1.2292954921722412, + "learning_rate": 2.2319619581819458e-05, + "loss": 0.3677, + "step": 415 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 1.2836815118789673, + "learning_rate": 2.2124801996536575e-05, + "loss": 0.3708, + "step": 420 + }, + { + "epoch": 1.14247311827957, + "grad_norm": 1.1228876113891602, + "learning_rate": 2.1928419848902163e-05, + "loss": 0.3467, + "step": 425 + }, + { + "epoch": 1.1559139784946237, + "grad_norm": 1.2151010036468506, + "learning_rate": 2.1730516263218472e-05, + "loss": 0.3149, + "step": 430 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 1.2808995246887207, + "learning_rate": 2.1531134697886103e-05, + "loss": 0.3341, + "step": 435 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 1.2367382049560547, + "learning_rate": 2.133031893586083e-05, + "loss": 0.3075, + "step": 440 + }, + { + "epoch": 1.196236559139785, + "grad_norm": 1.460054874420166, + "learning_rate": 2.1128113075039124e-05, + "loss": 0.3223, + "step": 445 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 1.3640908002853394, + "learning_rate": 2.092456151857455e-05, + "loss": 0.3143, + "step": 450 + }, + { + "epoch": 1.2231182795698925, + "grad_norm": 1.5188894271850586, + "learning_rate": 2.0719708965127073e-05, + "loss": 0.3722, + "step": 455 + }, + { + "epoch": 1.2365591397849462, + "grad_norm": 1.235180377960205, + "learning_rate": 2.0513600399047545e-05, + "loss": 0.312, + "step": 460 + }, + { + "epoch": 1.25, + "grad_norm": 1.312329649925232, + "learning_rate": 2.0306281080499413e-05, + "loss": 0.3184, + "step": 465 + }, + { + "epoch": 1.2634408602150538, + "grad_norm": 1.4068833589553833, + "learning_rate": 2.009779653551983e-05, + "loss": 0.2786, + "step": 470 + }, + { + "epoch": 1.2768817204301075, + "grad_norm": 1.411375880241394, + "learning_rate": 1.9888192546022488e-05, + "loss": 0.3099, + "step": 475 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 1.1644437313079834, + "learning_rate": 1.9677515139744126e-05, + "loss": 0.2726, + "step": 480 + }, + { + "epoch": 1.303763440860215, + "grad_norm": 1.554611086845398, + "learning_rate": 1.946581058013717e-05, + "loss": 0.2896, + "step": 485 + }, + { + "epoch": 1.3172043010752688, + "grad_norm": 1.1547613143920898, + "learning_rate": 1.9253125356210547e-05, + "loss": 0.2567, + "step": 490 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 1.4641879796981812, + "learning_rate": 1.903950617232098e-05, + "loss": 0.2824, + "step": 495 + }, + { + "epoch": 1.3440860215053765, + "grad_norm": 1.273492693901062, + "learning_rate": 1.8824999937917025e-05, + "loss": 0.2844, + "step": 500 + }, + { + "epoch": 1.35752688172043, + "grad_norm": 1.272354006767273, + "learning_rate": 1.8609653757238026e-05, + "loss": 0.2495, + "step": 505 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 1.1801941394805908, + "learning_rate": 1.8393514918970315e-05, + "loss": 0.2533, + "step": 510 + }, + { + "epoch": 1.3844086021505375, + "grad_norm": 1.213531732559204, + "learning_rate": 1.8176630885862927e-05, + "loss": 0.2566, + "step": 515 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 1.3281023502349854, + "learning_rate": 1.7959049284305056e-05, + "loss": 0.2844, + "step": 520 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 1.3111432790756226, + "learning_rate": 1.7740817893867613e-05, + "loss": 0.2776, + "step": 525 + }, + { + "epoch": 1.424731182795699, + "grad_norm": 1.2366544008255005, + "learning_rate": 1.752198463681111e-05, + "loss": 0.2593, + "step": 530 + }, + { + "epoch": 1.4381720430107527, + "grad_norm": 1.3541221618652344, + "learning_rate": 1.7302597567562237e-05, + "loss": 0.2466, + "step": 535 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 1.1326907873153687, + "learning_rate": 1.7082704862161422e-05, + "loss": 0.2802, + "step": 540 + }, + { + "epoch": 1.4650537634408602, + "grad_norm": 1.1728899478912354, + "learning_rate": 1.6862354807683643e-05, + "loss": 0.2542, + "step": 545 + }, + { + "epoch": 1.478494623655914, + "grad_norm": 1.1884706020355225, + "learning_rate": 1.66415957916349e-05, + "loss": 0.2319, + "step": 550 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 1.239565134048462, + "learning_rate": 1.642047629132663e-05, + "loss": 0.2612, + "step": 555 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 1.3990916013717651, + "learning_rate": 1.619904486323037e-05, + "loss": 0.2351, + "step": 560 + }, + { + "epoch": 1.5188172043010753, + "grad_norm": 1.3745218515396118, + "learning_rate": 1.597735013231507e-05, + "loss": 0.2443, + "step": 565 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 1.2794185876846313, + "learning_rate": 1.5755440781369345e-05, + "loss": 0.2591, + "step": 570 + }, + { + "epoch": 1.5456989247311828, + "grad_norm": 1.216627597808838, + "learning_rate": 1.5533365540311038e-05, + "loss": 0.245, + "step": 575 + }, + { + "epoch": 1.5591397849462365, + "grad_norm": 1.157889723777771, + "learning_rate": 1.531117317548643e-05, + "loss": 0.218, + "step": 580 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 1.2594391107559204, + "learning_rate": 1.5088912478961458e-05, + "loss": 0.2506, + "step": 585 + }, + { + "epoch": 1.586021505376344, + "grad_norm": 1.058353304862976, + "learning_rate": 1.4866632257807278e-05, + "loss": 0.2303, + "step": 590 + }, + { + "epoch": 1.599462365591398, + "grad_norm": 1.308736801147461, + "learning_rate": 1.4644381323382539e-05, + "loss": 0.1935, + "step": 595 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 1.087976336479187, + "learning_rate": 1.4422208480614684e-05, + "loss": 0.2064, + "step": 600 + }, + { + "epoch": 1.6263440860215055, + "grad_norm": 1.2073308229446411, + "learning_rate": 1.4200162517282736e-05, + "loss": 0.1974, + "step": 605 + }, + { + "epoch": 1.639784946236559, + "grad_norm": 1.2105491161346436, + "learning_rate": 1.3978292193303768e-05, + "loss": 0.2112, + "step": 610 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 1.2713239192962646, + "learning_rate": 1.3756646230025555e-05, + "loss": 0.1904, + "step": 615 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 1.2941814661026, + "learning_rate": 1.3535273299527622e-05, + "loss": 0.2, + "step": 620 + }, + { + "epoch": 1.6801075268817205, + "grad_norm": 1.2193764448165894, + "learning_rate": 1.3314222013933218e-05, + "loss": 0.2017, + "step": 625 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 1.2606887817382812, + "learning_rate": 1.3093540914734351e-05, + "loss": 0.201, + "step": 630 + }, + { + "epoch": 1.706989247311828, + "grad_norm": 1.3117613792419434, + "learning_rate": 1.2873278462132401e-05, + "loss": 0.2071, + "step": 635 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 1.45053231716156, + "learning_rate": 1.2653483024396535e-05, + "loss": 0.1916, + "step": 640 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 1.3497095108032227, + "learning_rate": 1.2434202867242372e-05, + "loss": 0.1897, + "step": 645 + }, + { + "epoch": 1.7473118279569892, + "grad_norm": 1.1170185804367065, + "learning_rate": 1.221548614323308e-05, + "loss": 0.1816, + "step": 650 + }, + { + "epoch": 1.760752688172043, + "grad_norm": 1.1701183319091797, + "learning_rate": 1.1997380881205431e-05, + "loss": 0.1776, + "step": 655 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 1.2451260089874268, + "learning_rate": 1.1779934975722919e-05, + "loss": 0.1936, + "step": 660 + }, + { + "epoch": 1.7876344086021505, + "grad_norm": 1.1680057048797607, + "learning_rate": 1.1563196176558436e-05, + "loss": 0.2076, + "step": 665 + }, + { + "epoch": 1.8010752688172043, + "grad_norm": 1.1820694208145142, + "learning_rate": 1.13472120782087e-05, + "loss": 0.2127, + "step": 670 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 1.1516228914260864, + "learning_rate": 1.1132030109442823e-05, + "loss": 0.2033, + "step": 675 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 1.050202488899231, + "learning_rate": 1.0917697522887217e-05, + "loss": 0.1508, + "step": 680 + }, + { + "epoch": 1.8413978494623655, + "grad_norm": 1.050594687461853, + "learning_rate": 1.0704261384649242e-05, + "loss": 0.1547, + "step": 685 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 1.2334657907485962, + "learning_rate": 1.0491768563981747e-05, + "loss": 0.1835, + "step": 690 + }, + { + "epoch": 1.868279569892473, + "grad_norm": 1.1029545068740845, + "learning_rate": 1.0280265722990908e-05, + "loss": 0.1424, + "step": 695 + }, + { + "epoch": 1.881720430107527, + "grad_norm": 1.0886962413787842, + "learning_rate": 1.0069799306389485e-05, + "loss": 0.1737, + "step": 700 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 1.0708134174346924, + "learning_rate": 9.860415531297881e-06, + "loss": 0.1601, + "step": 705 + }, + { + "epoch": 1.9086021505376345, + "grad_norm": 1.2164509296417236, + "learning_rate": 9.652160377095124e-06, + "loss": 0.175, + "step": 710 + }, + { + "epoch": 1.922043010752688, + "grad_norm": 1.1339483261108398, + "learning_rate": 9.445079575322101e-06, + "loss": 0.1627, + "step": 715 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 1.1148234605789185, + "learning_rate": 9.239218599639171e-06, + "loss": 0.1299, + "step": 720 + }, + { + "epoch": 1.9489247311827957, + "grad_norm": 0.9711194634437561, + "learning_rate": 9.03462265584046e-06, + "loss": 0.1295, + "step": 725 + }, + { + "epoch": 1.9623655913978495, + "grad_norm": 0.9990847706794739, + "learning_rate": 8.831336671926924e-06, + "loss": 0.1621, + "step": 730 + }, + { + "epoch": 1.9758064516129032, + "grad_norm": 1.3155736923217773, + "learning_rate": 8.629405288240461e-06, + "loss": 0.1326, + "step": 735 + }, + { + "epoch": 1.989247311827957, + "grad_norm": 0.8848444819450378, + "learning_rate": 8.428872847661139e-06, + "loss": 0.1718, + "step": 740 + }, + { + "epoch": 2.002688172043011, + "grad_norm": 1.0111210346221924, + "learning_rate": 8.229783385869807e-06, + "loss": 0.1582, + "step": 745 + }, + { + "epoch": 2.0161290322580645, + "grad_norm": 1.1479281187057495, + "learning_rate": 8.03218062167811e-06, + "loss": 0.1208, + "step": 750 + }, + { + "epoch": 2.0295698924731185, + "grad_norm": 1.0084205865859985, + "learning_rate": 7.836107947428115e-06, + "loss": 0.1167, + "step": 755 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.9987494945526123, + "learning_rate": 7.641608419463621e-06, + "loss": 0.1101, + "step": 760 + }, + { + "epoch": 2.056451612903226, + "grad_norm": 1.1359658241271973, + "learning_rate": 7.448724748675273e-06, + "loss": 0.1067, + "step": 765 + }, + { + "epoch": 2.0698924731182795, + "grad_norm": 1.0104074478149414, + "learning_rate": 7.257499291121473e-06, + "loss": 0.1288, + "step": 770 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 1.0295300483703613, + "learning_rate": 7.0679740387272896e-06, + "loss": 0.1169, + "step": 775 + }, + { + "epoch": 2.096774193548387, + "grad_norm": 0.8721526265144348, + "learning_rate": 6.880190610063272e-06, + "loss": 0.1103, + "step": 780 + }, + { + "epoch": 2.110215053763441, + "grad_norm": 0.965764045715332, + "learning_rate": 6.694190241206277e-06, + "loss": 0.1153, + "step": 785 + }, + { + "epoch": 2.1236559139784945, + "grad_norm": 0.9057422280311584, + "learning_rate": 6.510013776684281e-06, + "loss": 0.1378, + "step": 790 + }, + { + "epoch": 2.1370967741935485, + "grad_norm": 0.9549461603164673, + "learning_rate": 6.327701660507191e-06, + "loss": 0.0972, + "step": 795 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 1.0338079929351807, + "learning_rate": 6.147293927285537e-06, + "loss": 0.0944, + "step": 800 + }, + { + "epoch": 2.163978494623656, + "grad_norm": 0.9158647060394287, + "learning_rate": 5.968830193439195e-06, + "loss": 0.1002, + "step": 805 + }, + { + "epoch": 2.1774193548387095, + "grad_norm": 1.0119085311889648, + "learning_rate": 5.7923496484978115e-06, + "loss": 0.0886, + "step": 810 + }, + { + "epoch": 2.1908602150537635, + "grad_norm": 0.8685096502304077, + "learning_rate": 5.617891046495046e-06, + "loss": 0.1064, + "step": 815 + }, + { + "epoch": 2.204301075268817, + "grad_norm": 0.9151516556739807, + "learning_rate": 5.44549269745842e-06, + "loss": 0.0943, + "step": 820 + }, + { + "epoch": 2.217741935483871, + "grad_norm": 1.0354827642440796, + "learning_rate": 5.275192458996682e-06, + "loss": 0.1069, + "step": 825 + }, + { + "epoch": 2.2311827956989245, + "grad_norm": 0.9947547912597656, + "learning_rate": 5.1070277279864926e-06, + "loss": 0.0938, + "step": 830 + }, + { + "epoch": 2.2446236559139785, + "grad_norm": 0.9470123052597046, + "learning_rate": 4.941035432360333e-06, + "loss": 0.0968, + "step": 835 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.960735559463501, + "learning_rate": 4.777252022997327e-06, + "loss": 0.1065, + "step": 840 + }, + { + "epoch": 2.271505376344086, + "grad_norm": 0.9803473353385925, + "learning_rate": 4.615713465718867e-06, + "loss": 0.1055, + "step": 845 + }, + { + "epoch": 2.28494623655914, + "grad_norm": 1.034049153327942, + "learning_rate": 4.456455233390751e-06, + "loss": 0.0842, + "step": 850 + }, + { + "epoch": 2.2983870967741935, + "grad_norm": 0.7745009064674377, + "learning_rate": 4.299512298133546e-06, + "loss": 0.0947, + "step": 855 + }, + { + "epoch": 2.3118279569892475, + "grad_norm": 0.7869125604629517, + "learning_rate": 4.144919123642947e-06, + "loss": 0.0991, + "step": 860 + }, + { + "epoch": 2.325268817204301, + "grad_norm": 0.9363027811050415, + "learning_rate": 3.992709657621739e-06, + "loss": 0.1015, + "step": 865 + }, + { + "epoch": 2.338709677419355, + "grad_norm": 0.9664344191551208, + "learning_rate": 3.842917324325107e-06, + "loss": 0.1057, + "step": 870 + }, + { + "epoch": 2.3521505376344085, + "grad_norm": 1.0178738832473755, + "learning_rate": 3.6955750172208763e-06, + "loss": 0.1038, + "step": 875 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 0.9806485176086426, + "learning_rate": 3.550715091766307e-06, + "loss": 0.0984, + "step": 880 + }, + { + "epoch": 2.379032258064516, + "grad_norm": 0.8055049777030945, + "learning_rate": 3.4083693583030306e-06, + "loss": 0.1018, + "step": 885 + }, + { + "epoch": 2.39247311827957, + "grad_norm": 0.8358145356178284, + "learning_rate": 3.268569075071722e-06, + "loss": 0.0889, + "step": 890 + }, + { + "epoch": 2.4059139784946235, + "grad_norm": 0.7631276845932007, + "learning_rate": 3.131344941347937e-06, + "loss": 0.1131, + "step": 895 + }, + { + "epoch": 2.4193548387096775, + "grad_norm": 0.6635212898254395, + "learning_rate": 2.996727090700794e-06, + "loss": 0.09, + "step": 900 + }, + { + "epoch": 2.432795698924731, + "grad_norm": 0.8987323045730591, + "learning_rate": 2.86474508437579e-06, + "loss": 0.0898, + "step": 905 + }, + { + "epoch": 2.446236559139785, + "grad_norm": 0.7536560297012329, + "learning_rate": 2.7354279048033524e-06, + "loss": 0.0944, + "step": 910 + }, + { + "epoch": 2.4596774193548385, + "grad_norm": 0.7745615839958191, + "learning_rate": 2.6088039492344707e-06, + "loss": 0.0978, + "step": 915 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.7540575265884399, + "learning_rate": 2.4849010235048513e-06, + "loss": 0.0983, + "step": 920 + }, + { + "epoch": 2.486559139784946, + "grad_norm": 0.8482332229614258, + "learning_rate": 2.3637463359288914e-06, + "loss": 0.0995, + "step": 925 + }, + { + "epoch": 2.5, + "grad_norm": 0.9521576166152954, + "learning_rate": 2.2453664913249505e-06, + "loss": 0.1022, + "step": 930 + }, + { + "epoch": 2.513440860215054, + "grad_norm": 0.830055832862854, + "learning_rate": 2.1297874851730575e-06, + "loss": 0.0871, + "step": 935 + }, + { + "epoch": 2.5268817204301075, + "grad_norm": 0.9605406522750854, + "learning_rate": 2.017034697906489e-06, + "loss": 0.0926, + "step": 940 + }, + { + "epoch": 2.540322580645161, + "grad_norm": 0.9175301790237427, + "learning_rate": 1.9071328893383667e-06, + "loss": 0.1007, + "step": 945 + }, + { + "epoch": 2.553763440860215, + "grad_norm": 0.7776777744293213, + "learning_rate": 1.8001061932245654e-06, + "loss": 0.0956, + "step": 950 + }, + { + "epoch": 2.567204301075269, + "grad_norm": 0.8223841786384583, + "learning_rate": 1.6959781119640894e-06, + "loss": 0.1068, + "step": 955 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.872330367565155, + "learning_rate": 1.594771511438095e-06, + "loss": 0.0893, + "step": 960 + }, + { + "epoch": 2.5940860215053765, + "grad_norm": 0.8149677515029907, + "learning_rate": 1.4965086159886694e-06, + "loss": 0.0704, + "step": 965 + }, + { + "epoch": 2.60752688172043, + "grad_norm": 0.8135682344436646, + "learning_rate": 1.401211003538519e-06, + "loss": 0.0997, + "step": 970 + }, + { + "epoch": 2.620967741935484, + "grad_norm": 0.69486004114151, + "learning_rate": 1.308899600852585e-06, + "loss": 0.0823, + "step": 975 + }, + { + "epoch": 2.6344086021505375, + "grad_norm": 0.6869550943374634, + "learning_rate": 1.2195946789426531e-06, + "loss": 0.0814, + "step": 980 + }, + { + "epoch": 2.6478494623655915, + "grad_norm": 1.0490151643753052, + "learning_rate": 1.13331584861597e-06, + "loss": 0.1012, + "step": 985 + }, + { + "epoch": 2.661290322580645, + "grad_norm": 1.0116806030273438, + "learning_rate": 1.0500820561688374e-06, + "loss": 0.0909, + "step": 990 + }, + { + "epoch": 2.674731182795699, + "grad_norm": 0.842189610004425, + "learning_rate": 9.699115792260953e-07, + "loss": 0.088, + "step": 995 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.7198744416236877, + "learning_rate": 8.928220227275086e-07, + "loss": 0.0851, + "step": 1000 + }, + { + "epoch": 2.7016129032258065, + "grad_norm": 0.7036964893341064, + "learning_rate": 8.18830315061801e-07, + "loss": 0.0932, + "step": 1005 + }, + { + "epoch": 2.71505376344086, + "grad_norm": 0.6908620595932007, + "learning_rate": 7.479527043492984e-07, + "loss": 0.0812, + "step": 1010 + }, + { + "epoch": 2.728494623655914, + "grad_norm": 0.7726622819900513, + "learning_rate": 6.802047548739409e-07, + "loss": 0.0873, + "step": 1015 + }, + { + "epoch": 2.741935483870968, + "grad_norm": 0.560405433177948, + "learning_rate": 6.156013436654617e-07, + "loss": 0.0739, + "step": 1020 + }, + { + "epoch": 2.7553763440860215, + "grad_norm": 0.6763749122619629, + "learning_rate": 5.541566572324786e-07, + "loss": 0.0833, + "step": 1025 + }, + { + "epoch": 2.768817204301075, + "grad_norm": 0.8109676837921143, + "learning_rate": 4.958841884472409e-07, + "loss": 0.0892, + "step": 1030 + }, + { + "epoch": 2.782258064516129, + "grad_norm": 0.6238796710968018, + "learning_rate": 4.407967335826585e-07, + "loss": 0.085, + "step": 1035 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.7518003582954407, + "learning_rate": 3.889063895023287e-07, + "loss": 0.0798, + "step": 1040 + }, + { + "epoch": 2.8091397849462365, + "grad_norm": 0.8182872533798218, + "learning_rate": 3.4022455100414184e-07, + "loss": 0.1027, + "step": 1045 + }, + { + "epoch": 2.8225806451612905, + "grad_norm": 0.6480991840362549, + "learning_rate": 2.947619083180525e-07, + "loss": 0.0853, + "step": 1050 + }, + { + "epoch": 2.836021505376344, + "grad_norm": 0.6119154095649719, + "learning_rate": 2.5252844475856906e-07, + "loss": 0.0705, + "step": 1055 + }, + { + "epoch": 2.849462365591398, + "grad_norm": 0.6974361538887024, + "learning_rate": 2.1353343453248408e-07, + "loss": 0.0823, + "step": 1060 + }, + { + "epoch": 2.8629032258064515, + "grad_norm": 0.6691406965255737, + "learning_rate": 1.777854407023083e-07, + "loss": 0.08, + "step": 1065 + }, + { + "epoch": 2.8763440860215055, + "grad_norm": 0.7862398624420166, + "learning_rate": 1.4529231330588644e-07, + "loss": 0.0913, + "step": 1070 + }, + { + "epoch": 2.889784946236559, + "grad_norm": 0.5753769874572754, + "learning_rate": 1.160611876325679e-07, + "loss": 0.0852, + "step": 1075 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.7349390387535095, + "learning_rate": 9.009848265634669e-08, + "loss": 0.0826, + "step": 1080 + }, + { + "epoch": 2.9166666666666665, + "grad_norm": 0.5749334692955017, + "learning_rate": 6.740989962628896e-08, + "loss": 0.0824, + "step": 1085 + }, + { + "epoch": 2.9301075268817205, + "grad_norm": 0.6566449403762817, + "learning_rate": 4.800042081458456e-08, + "loss": 0.0789, + "step": 1090 + }, + { + "epoch": 2.943548387096774, + "grad_norm": 0.7175581455230713, + "learning_rate": 3.187430842245709e-08, + "loss": 0.0892, + "step": 1095 + }, + { + "epoch": 2.956989247311828, + "grad_norm": 0.7799828052520752, + "learning_rate": 1.9035103644222675e-08, + "loss": 0.0855, + "step": 1100 + }, + { + "epoch": 2.970430107526882, + "grad_norm": 0.6160293221473694, + "learning_rate": 9.485625889660288e-09, + "loss": 0.0822, + "step": 1105 + }, + { + "epoch": 2.9838709677419355, + "grad_norm": 0.7389230728149414, + "learning_rate": 3.2279721648870162e-09, + "loss": 0.0794, + "step": 1110 + }, + { + "epoch": 2.997311827956989, + "grad_norm": 0.5900013446807861, + "learning_rate": 2.6351661186974074e-10, + "loss": 0.0817, + "step": 1115 + }, + { + "epoch": 3.0, + "step": 1116, + "total_flos": 1.424766610454872e+18, + "train_loss": 0.41283174582813803, + "train_runtime": 1143.5346, + "train_samples_per_second": 31.214, + "train_steps_per_second": 0.976 + } + ], + "logging_steps": 5, + "max_steps": 1116, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.424766610454872e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7193640e218b7dcccc13d26522fa4fd12ab6ea22 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/13_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d190e59373b6df5ef67048d2490165f9e92f48b6b3b0c2afb5e32e9742d8cf3 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e8088b3b3c0c955fd00c2d75b4dd7f9b3c8f40f6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 14_128_e3_3e-5 + results: [] +--- + + + +# 14_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3231037808c7c53a1ead7e7833defea9ba3a247 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "q_proj", + "o_proj", + "gate_proj", + "down_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e6cce3de40cc8e693e82f1b0b1b88eccae488f9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aa43d7531c4d7b151955717b92ebf55996b68110f92a3727598f245706c3522 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e89bec5b9171ef53227ec65a6e7ff59e51f806d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5053071121236623e+18, + "train_loss": 0.44160460342862684, + "train_runtime": 1177.0931, + "train_samples": 12598, + "train_samples_per_second": 32.108, + "train_steps_per_second": 1.004 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e89bec5b9171ef53227ec65a6e7ff59e51f806d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5053071121236623e+18, + "train_loss": 0.44160460342862684, + "train_runtime": 1177.0931, + "train_samples": 12598, + "train_samples_per_second": 32.108, + "train_steps_per_second": 1.004 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..977f4a57f28756758ac0a2928463c3dcf1f1fd08 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1695 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1182, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012698412698412698, + "grad_norm": 0.5394964218139648, + "learning_rate": 2e-06, + "loss": 1.5542, + "step": 5 + }, + { + "epoch": 0.025396825396825397, + "grad_norm": 0.6237690448760986, + "learning_rate": 4.5e-06, + "loss": 1.5577, + "step": 10 + }, + { + "epoch": 0.0380952380952381, + "grad_norm": 0.5467448234558105, + "learning_rate": 7e-06, + "loss": 1.5534, + "step": 15 + }, + { + "epoch": 0.050793650793650794, + "grad_norm": 0.5596557855606079, + "learning_rate": 9.5e-06, + "loss": 1.5629, + "step": 20 + }, + { + "epoch": 0.06349206349206349, + "grad_norm": 0.5852771997451782, + "learning_rate": 1.2e-05, + "loss": 1.5481, + "step": 25 + }, + { + "epoch": 0.0761904761904762, + "grad_norm": 0.5108491778373718, + "learning_rate": 1.45e-05, + "loss": 1.472, + "step": 30 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 0.5245486497879028, + "learning_rate": 1.7e-05, + "loss": 1.5296, + "step": 35 + }, + { + "epoch": 0.10158730158730159, + "grad_norm": 0.5260447263717651, + "learning_rate": 1.95e-05, + "loss": 1.52, + "step": 40 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 0.5539010167121887, + "learning_rate": 2.2e-05, + "loss": 1.4476, + "step": 45 + }, + { + "epoch": 0.12698412698412698, + "grad_norm": 0.5047003626823425, + "learning_rate": 2.45e-05, + "loss": 1.4679, + "step": 50 + }, + { + "epoch": 0.13968253968253969, + "grad_norm": 0.5984309315681458, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.4252, + "step": 55 + }, + { + "epoch": 0.1523809523809524, + "grad_norm": 0.6455069780349731, + "learning_rate": 2.95e-05, + "loss": 1.3456, + "step": 60 + }, + { + "epoch": 0.16507936507936508, + "grad_norm": 0.6463902592658997, + "learning_rate": 2.9999059214234348e-05, + "loss": 1.3904, + "step": 65 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.5942544937133789, + "learning_rate": 2.99952374743146e-05, + "loss": 1.329, + "step": 70 + }, + { + "epoch": 0.19047619047619047, + "grad_norm": 0.5599715709686279, + "learning_rate": 2.9988476729599466e-05, + "loss": 1.3107, + "step": 75 + }, + { + "epoch": 0.20317460317460317, + "grad_norm": 0.6521152853965759, + "learning_rate": 2.9978778305167067e-05, + "loss": 1.3702, + "step": 80 + }, + { + "epoch": 0.21587301587301588, + "grad_norm": 0.6484359502792358, + "learning_rate": 2.9966144101868642e-05, + "loss": 1.2579, + "step": 85 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 0.6846063733100891, + "learning_rate": 2.995057659595603e-05, + "loss": 1.3022, + "step": 90 + }, + { + "epoch": 0.24126984126984127, + "grad_norm": 0.8142194151878357, + "learning_rate": 2.993207883859627e-05, + "loss": 1.2448, + "step": 95 + }, + { + "epoch": 0.25396825396825395, + "grad_norm": 0.6492512226104736, + "learning_rate": 2.991065445527364e-05, + "loss": 1.244, + "step": 100 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.6961635947227478, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1941, + "step": 105 + }, + { + "epoch": 0.27936507936507937, + "grad_norm": 0.7245580554008484, + "learning_rate": 2.985904317988701e-05, + "loss": 1.2101, + "step": 110 + }, + { + "epoch": 0.2920634920634921, + "grad_norm": 0.7226532101631165, + "learning_rate": 2.982886640342046e-05, + "loss": 1.1414, + "step": 115 + }, + { + "epoch": 0.3047619047619048, + "grad_norm": 0.7799679636955261, + "learning_rate": 2.9795783230203305e-05, + "loss": 1.1626, + "step": 120 + }, + { + "epoch": 0.31746031746031744, + "grad_norm": 1.0229713916778564, + "learning_rate": 2.9759800144401265e-05, + "loss": 1.096, + "step": 125 + }, + { + "epoch": 0.33015873015873015, + "grad_norm": 0.902716875076294, + "learning_rate": 2.9720924198550978e-05, + "loss": 1.0846, + "step": 130 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 0.8179954290390015, + "learning_rate": 2.967916301217774e-05, + "loss": 1.0446, + "step": 135 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.9370952844619751, + "learning_rate": 2.9634524770302118e-05, + "loss": 1.048, + "step": 140 + }, + { + "epoch": 0.3682539682539683, + "grad_norm": 0.8322552442550659, + "learning_rate": 2.9587018221835692e-05, + "loss": 1.0557, + "step": 145 + }, + { + "epoch": 0.38095238095238093, + "grad_norm": 0.8093209266662598, + "learning_rate": 2.953665267786634e-05, + "loss": 1.0054, + "step": 150 + }, + { + "epoch": 0.39365079365079364, + "grad_norm": 1.048215389251709, + "learning_rate": 2.9483438009833267e-05, + "loss": 0.9705, + "step": 155 + }, + { + "epoch": 0.40634920634920635, + "grad_norm": 1.0365275144577026, + "learning_rate": 2.942738464759229e-05, + "loss": 1.0181, + "step": 160 + }, + { + "epoch": 0.41904761904761906, + "grad_norm": 1.0487090349197388, + "learning_rate": 2.9368503577371562e-05, + "loss": 0.9309, + "step": 165 + }, + { + "epoch": 0.43174603174603177, + "grad_norm": 1.0779225826263428, + "learning_rate": 2.9306806339618394e-05, + "loss": 0.9264, + "step": 170 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 1.0937612056732178, + "learning_rate": 2.9242305026737313e-05, + "loss": 0.8917, + "step": 175 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 0.9135797619819641, + "learning_rate": 2.9175012280720024e-05, + "loss": 0.9971, + "step": 180 + }, + { + "epoch": 0.46984126984126984, + "grad_norm": 0.9684476852416992, + "learning_rate": 2.910494129066766e-05, + "loss": 0.9843, + "step": 185 + }, + { + "epoch": 0.48253968253968255, + "grad_norm": 0.9964056015014648, + "learning_rate": 2.9032105790205696e-05, + "loss": 0.851, + "step": 190 + }, + { + "epoch": 0.49523809523809526, + "grad_norm": 1.0189656019210815, + "learning_rate": 2.8956520054792306e-05, + "loss": 0.8847, + "step": 195 + }, + { + "epoch": 0.5079365079365079, + "grad_norm": 0.9090765118598938, + "learning_rate": 2.887819889892037e-05, + "loss": 0.8271, + "step": 200 + }, + { + "epoch": 0.5206349206349207, + "grad_norm": 0.9835756421089172, + "learning_rate": 2.8797157673213918e-05, + "loss": 0.7971, + "step": 205 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.523632526397705, + "learning_rate": 2.871341226141946e-05, + "loss": 0.8274, + "step": 210 + }, + { + "epoch": 0.546031746031746, + "grad_norm": 1.2632782459259033, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.8545, + "step": 215 + }, + { + "epoch": 0.5587301587301587, + "grad_norm": 1.0179686546325684, + "learning_rate": 2.853787506138226e-05, + "loss": 0.8055, + "step": 220 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 1.1268579959869385, + "learning_rate": 2.844611767770787e-05, + "loss": 0.7261, + "step": 225 + }, + { + "epoch": 0.5841269841269842, + "grad_norm": 1.2493889331817627, + "learning_rate": 2.835172491033903e-05, + "loss": 0.7667, + "step": 230 + }, + { + "epoch": 0.5968253968253968, + "grad_norm": 1.138248085975647, + "learning_rate": 2.825471525986945e-05, + "loss": 0.7938, + "step": 235 + }, + { + "epoch": 0.6095238095238096, + "grad_norm": 1.0656896829605103, + "learning_rate": 2.815510773979113e-05, + "loss": 0.7054, + "step": 240 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 1.1778075695037842, + "learning_rate": 2.8052921872767832e-05, + "loss": 0.7383, + "step": 245 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 1.2424064874649048, + "learning_rate": 2.7948177686808695e-05, + "loss": 0.7346, + "step": 250 + }, + { + "epoch": 0.6476190476190476, + "grad_norm": 1.1305841207504272, + "learning_rate": 2.7840895711342836e-05, + "loss": 0.7116, + "step": 255 + }, + { + "epoch": 0.6603174603174603, + "grad_norm": 1.192408561706543, + "learning_rate": 2.7731096973195664e-05, + "loss": 0.6895, + "step": 260 + }, + { + "epoch": 0.6730158730158731, + "grad_norm": 1.1263285875320435, + "learning_rate": 2.7618802992467718e-05, + "loss": 0.6839, + "step": 265 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 1.3604676723480225, + "learning_rate": 2.750403577831679e-05, + "loss": 0.695, + "step": 270 + }, + { + "epoch": 0.6984126984126984, + "grad_norm": 1.1663668155670166, + "learning_rate": 2.738681782464426e-05, + "loss": 0.683, + "step": 275 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 1.254376769065857, + "learning_rate": 2.726717210568634e-05, + "loss": 0.6967, + "step": 280 + }, + { + "epoch": 0.7238095238095238, + "grad_norm": 1.1651451587677002, + "learning_rate": 2.7145122071511253e-05, + "loss": 0.6141, + "step": 285 + }, + { + "epoch": 0.7365079365079366, + "grad_norm": 1.1341975927352905, + "learning_rate": 2.7020691643423105e-05, + "loss": 0.7001, + "step": 290 + }, + { + "epoch": 0.7492063492063492, + "grad_norm": 1.2259856462478638, + "learning_rate": 2.6893905209273405e-05, + "loss": 0.6344, + "step": 295 + }, + { + "epoch": 0.7619047619047619, + "grad_norm": 1.1348968744277954, + "learning_rate": 2.676478761868114e-05, + "loss": 0.6209, + "step": 300 + }, + { + "epoch": 0.7746031746031746, + "grad_norm": 1.1974910497665405, + "learning_rate": 2.663336417816238e-05, + "loss": 0.6573, + "step": 305 + }, + { + "epoch": 0.7873015873015873, + "grad_norm": 1.1579430103302002, + "learning_rate": 2.649966064617025e-05, + "loss": 0.62, + "step": 310 + }, + { + "epoch": 0.8, + "grad_norm": 1.293269395828247, + "learning_rate": 2.6363703228046455e-05, + "loss": 0.6719, + "step": 315 + }, + { + "epoch": 0.8126984126984127, + "grad_norm": 1.129618763923645, + "learning_rate": 2.6225518570885083e-05, + "loss": 0.6544, + "step": 320 + }, + { + "epoch": 0.8253968253968254, + "grad_norm": 1.2239248752593994, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.6035, + "step": 325 + }, + { + "epoch": 0.8380952380952381, + "grad_norm": 1.2538188695907593, + "learning_rate": 2.5942576305166044e-05, + "loss": 0.5851, + "step": 330 + }, + { + "epoch": 0.8507936507936508, + "grad_norm": 1.2427327632904053, + "learning_rate": 2.5797874152127325e-05, + "loss": 0.613, + "step": 335 + }, + { + "epoch": 0.8634920634920635, + "grad_norm": 1.3899816274642944, + "learning_rate": 2.5651055660219847e-05, + "loss": 0.5722, + "step": 340 + }, + { + "epoch": 0.8761904761904762, + "grad_norm": 1.2263929843902588, + "learning_rate": 2.550214960526344e-05, + "loss": 0.4996, + "step": 345 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 1.3536083698272705, + "learning_rate": 2.5351185172231683e-05, + "loss": 0.5486, + "step": 350 + }, + { + "epoch": 0.9015873015873016, + "grad_norm": 1.1883643865585327, + "learning_rate": 2.5198191949531785e-05, + "loss": 0.5525, + "step": 355 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 1.2087585926055908, + "learning_rate": 2.5043199923205382e-05, + "loss": 0.5307, + "step": 360 + }, + { + "epoch": 0.926984126984127, + "grad_norm": 1.403260350227356, + "learning_rate": 2.488623947105138e-05, + "loss": 0.5329, + "step": 365 + }, + { + "epoch": 0.9396825396825397, + "grad_norm": 1.2841590642929077, + "learning_rate": 2.4727341356672057e-05, + "loss": 0.5146, + "step": 370 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 1.2752699851989746, + "learning_rate": 2.4566536723443486e-05, + "loss": 0.5544, + "step": 375 + }, + { + "epoch": 0.9650793650793651, + "grad_norm": 1.3032246828079224, + "learning_rate": 2.44038570884116e-05, + "loss": 0.5204, + "step": 380 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 1.2452208995819092, + "learning_rate": 2.4239334336114955e-05, + "loss": 0.5016, + "step": 385 + }, + { + "epoch": 0.9904761904761905, + "grad_norm": 1.3230150938034058, + "learning_rate": 2.4073000712335516e-05, + "loss": 0.4934, + "step": 390 + }, + { + "epoch": 1.0025396825396826, + "grad_norm": 1.1621359586715698, + "learning_rate": 2.3904888817778583e-05, + "loss": 0.4901, + "step": 395 + }, + { + "epoch": 1.0152380952380953, + "grad_norm": 1.584425926208496, + "learning_rate": 2.373503160168321e-05, + "loss": 0.4071, + "step": 400 + }, + { + "epoch": 1.027936507936508, + "grad_norm": 1.1314722299575806, + "learning_rate": 2.35634623553643e-05, + "loss": 0.4025, + "step": 405 + }, + { + "epoch": 1.0406349206349206, + "grad_norm": 1.3247960805892944, + "learning_rate": 2.3390214705687607e-05, + "loss": 0.4159, + "step": 410 + }, + { + "epoch": 1.0533333333333332, + "grad_norm": 1.4219434261322021, + "learning_rate": 2.321532260847905e-05, + "loss": 0.4579, + "step": 415 + }, + { + "epoch": 1.066031746031746, + "grad_norm": 1.2847566604614258, + "learning_rate": 2.3038820341869486e-05, + "loss": 0.4515, + "step": 420 + }, + { + "epoch": 1.0787301587301588, + "grad_norm": 1.6281241178512573, + "learning_rate": 2.286074249957634e-05, + "loss": 0.3999, + "step": 425 + }, + { + "epoch": 1.0914285714285714, + "grad_norm": 1.1798696517944336, + "learning_rate": 2.2681123984123388e-05, + "loss": 0.4063, + "step": 430 + }, + { + "epoch": 1.104126984126984, + "grad_norm": 1.3741867542266846, + "learning_rate": 2.25e-05, + "loss": 0.4038, + "step": 435 + }, + { + "epoch": 1.116825396825397, + "grad_norm": 1.2776461839675903, + "learning_rate": 2.231740604676121e-05, + "loss": 0.3282, + "step": 440 + }, + { + "epoch": 1.1295238095238096, + "grad_norm": 1.3186806440353394, + "learning_rate": 2.2133377912069934e-05, + "loss": 0.3607, + "step": 445 + }, + { + "epoch": 1.1422222222222222, + "grad_norm": 1.2439967393875122, + "learning_rate": 2.194795166468275e-05, + "loss": 0.3449, + "step": 450 + }, + { + "epoch": 1.154920634920635, + "grad_norm": 1.2322245836257935, + "learning_rate": 2.1761163647380583e-05, + "loss": 0.3467, + "step": 455 + }, + { + "epoch": 1.1676190476190476, + "grad_norm": 1.1756278276443481, + "learning_rate": 2.157305046984563e-05, + "loss": 0.3755, + "step": 460 + }, + { + "epoch": 1.1803174603174602, + "grad_norm": 1.2392399311065674, + "learning_rate": 2.1383649001486055e-05, + "loss": 0.3987, + "step": 465 + }, + { + "epoch": 1.193015873015873, + "grad_norm": 1.1843605041503906, + "learning_rate": 2.1192996364209708e-05, + "loss": 0.371, + "step": 470 + }, + { + "epoch": 1.2057142857142857, + "grad_norm": 1.6622204780578613, + "learning_rate": 2.10011299251484e-05, + "loss": 0.3703, + "step": 475 + }, + { + "epoch": 1.2184126984126984, + "grad_norm": 1.259009838104248, + "learning_rate": 2.080808728933409e-05, + "loss": 0.3463, + "step": 480 + }, + { + "epoch": 1.231111111111111, + "grad_norm": 1.3609812259674072, + "learning_rate": 2.061390629232846e-05, + "loss": 0.3436, + "step": 485 + }, + { + "epoch": 1.243809523809524, + "grad_norm": 1.2488973140716553, + "learning_rate": 2.0418624992807297e-05, + "loss": 0.3841, + "step": 490 + }, + { + "epoch": 1.2565079365079366, + "grad_norm": 1.2770917415618896, + "learning_rate": 2.0222281665101145e-05, + "loss": 0.306, + "step": 495 + }, + { + "epoch": 1.2692063492063492, + "grad_norm": 1.2957097291946411, + "learning_rate": 2.0024914791693707e-05, + "loss": 0.3684, + "step": 500 + }, + { + "epoch": 1.2819047619047619, + "grad_norm": 1.4656062126159668, + "learning_rate": 1.982656305567942e-05, + "loss": 0.3133, + "step": 505 + }, + { + "epoch": 1.2946031746031745, + "grad_norm": 1.2269419431686401, + "learning_rate": 1.962726533318175e-05, + "loss": 0.2911, + "step": 510 + }, + { + "epoch": 1.3073015873015872, + "grad_norm": 1.3182893991470337, + "learning_rate": 1.94270606857336e-05, + "loss": 0.3322, + "step": 515 + }, + { + "epoch": 1.32, + "grad_norm": 1.3440415859222412, + "learning_rate": 1.9225988352621445e-05, + "loss": 0.3404, + "step": 520 + }, + { + "epoch": 1.3326984126984127, + "grad_norm": 1.4496651887893677, + "learning_rate": 1.9024087743194566e-05, + "loss": 0.3, + "step": 525 + }, + { + "epoch": 1.3453968253968254, + "grad_norm": 1.3102903366088867, + "learning_rate": 1.8821398429140996e-05, + "loss": 0.3384, + "step": 530 + }, + { + "epoch": 1.358095238095238, + "grad_norm": 1.5692989826202393, + "learning_rate": 1.8617960136731627e-05, + "loss": 0.3006, + "step": 535 + }, + { + "epoch": 1.370793650793651, + "grad_norm": 1.3984295129776, + "learning_rate": 1.841381273903402e-05, + "loss": 0.301, + "step": 540 + }, + { + "epoch": 1.3834920634920636, + "grad_norm": 1.3871408700942993, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.2964, + "step": 545 + }, + { + "epoch": 1.3961904761904762, + "grad_norm": 1.240869164466858, + "learning_rate": 1.8003550807110744e-05, + "loss": 0.2955, + "step": 550 + }, + { + "epoch": 1.4088888888888889, + "grad_norm": 1.1730655431747437, + "learning_rate": 1.7797516682534294e-05, + "loss": 0.3001, + "step": 555 + }, + { + "epoch": 1.4215873015873015, + "grad_norm": 1.4017055034637451, + "learning_rate": 1.759093425620809e-05, + "loss": 0.2976, + "step": 560 + }, + { + "epoch": 1.4342857142857142, + "grad_norm": 1.3266613483428955, + "learning_rate": 1.7383844017436998e-05, + "loss": 0.3229, + "step": 565 + }, + { + "epoch": 1.446984126984127, + "grad_norm": 1.1362383365631104, + "learning_rate": 1.717628655505503e-05, + "loss": 0.2775, + "step": 570 + }, + { + "epoch": 1.4596825396825397, + "grad_norm": 1.22533118724823, + "learning_rate": 1.6968302549470097e-05, + "loss": 0.2987, + "step": 575 + }, + { + "epoch": 1.4723809523809523, + "grad_norm": 1.4103273153305054, + "learning_rate": 1.6759932764690834e-05, + "loss": 0.314, + "step": 580 + }, + { + "epoch": 1.485079365079365, + "grad_norm": 1.3505021333694458, + "learning_rate": 1.6551218040336994e-05, + "loss": 0.2536, + "step": 585 + }, + { + "epoch": 1.4977777777777779, + "grad_norm": 1.3529834747314453, + "learning_rate": 1.6342199283635086e-05, + "loss": 0.2537, + "step": 590 + }, + { + "epoch": 1.5104761904761905, + "grad_norm": 1.205641269683838, + "learning_rate": 1.6132917461400685e-05, + "loss": 0.2491, + "step": 595 + }, + { + "epoch": 1.5231746031746032, + "grad_norm": 1.4195793867111206, + "learning_rate": 1.5923413592009145e-05, + "loss": 0.247, + "step": 600 + }, + { + "epoch": 1.5358730158730158, + "grad_norm": 1.4223089218139648, + "learning_rate": 1.5713728737356138e-05, + "loss": 0.2355, + "step": 605 + }, + { + "epoch": 1.5485714285714285, + "grad_norm": 1.1247057914733887, + "learning_rate": 1.5503903994809704e-05, + "loss": 0.2617, + "step": 610 + }, + { + "epoch": 1.5612698412698411, + "grad_norm": 1.2796422243118286, + "learning_rate": 1.5293980489155335e-05, + "loss": 0.2517, + "step": 615 + }, + { + "epoch": 1.573968253968254, + "grad_norm": 1.3102385997772217, + "learning_rate": 1.508399936453569e-05, + "loss": 0.2451, + "step": 620 + }, + { + "epoch": 1.5866666666666667, + "grad_norm": 1.4135396480560303, + "learning_rate": 1.4874001776386501e-05, + "loss": 0.2547, + "step": 625 + }, + { + "epoch": 1.5993650793650793, + "grad_norm": 1.3633917570114136, + "learning_rate": 1.4664028883370285e-05, + "loss": 0.2665, + "step": 630 + }, + { + "epoch": 1.6120634920634922, + "grad_norm": 1.3359363079071045, + "learning_rate": 1.4454121839309416e-05, + "loss": 0.2337, + "step": 635 + }, + { + "epoch": 1.6247619047619049, + "grad_norm": 1.3503613471984863, + "learning_rate": 1.4244321785120149e-05, + "loss": 0.244, + "step": 640 + }, + { + "epoch": 1.6374603174603175, + "grad_norm": 1.0966659784317017, + "learning_rate": 1.4034669840749155e-05, + "loss": 0.2427, + "step": 645 + }, + { + "epoch": 1.6501587301587302, + "grad_norm": 1.259787917137146, + "learning_rate": 1.3825207097114201e-05, + "loss": 0.2601, + "step": 650 + }, + { + "epoch": 1.6628571428571428, + "grad_norm": 1.2575656175613403, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2323, + "step": 655 + }, + { + "epoch": 1.6755555555555555, + "grad_norm": 1.3066540956497192, + "learning_rate": 1.3407013382264229e-05, + "loss": 0.2422, + "step": 660 + }, + { + "epoch": 1.6882539682539681, + "grad_norm": 1.0777665376663208, + "learning_rate": 1.3198364375295225e-05, + "loss": 0.2249, + "step": 665 + }, + { + "epoch": 1.700952380952381, + "grad_norm": 1.240071415901184, + "learning_rate": 1.2990068481489631e-05, + "loss": 0.2282, + "step": 670 + }, + { + "epoch": 1.7136507936507936, + "grad_norm": 1.3707153797149658, + "learning_rate": 1.278216652598487e-05, + "loss": 0.228, + "step": 675 + }, + { + "epoch": 1.7263492063492063, + "grad_norm": 1.236106038093567, + "learning_rate": 1.2574699256708104e-05, + "loss": 0.214, + "step": 680 + }, + { + "epoch": 1.7390476190476192, + "grad_norm": 1.2130684852600098, + "learning_rate": 1.2367707336389761e-05, + "loss": 0.1897, + "step": 685 + }, + { + "epoch": 1.7517460317460318, + "grad_norm": 1.1631022691726685, + "learning_rate": 1.2161231334593853e-05, + "loss": 0.2344, + "step": 690 + }, + { + "epoch": 1.7644444444444445, + "grad_norm": 1.1569691896438599, + "learning_rate": 1.1955311719766463e-05, + "loss": 0.1996, + "step": 695 + }, + { + "epoch": 1.7771428571428571, + "grad_norm": 1.2000324726104736, + "learning_rate": 1.1749988851304141e-05, + "loss": 0.1675, + "step": 700 + }, + { + "epoch": 1.7898412698412698, + "grad_norm": 1.3367247581481934, + "learning_rate": 1.1545302971643591e-05, + "loss": 0.2122, + "step": 705 + }, + { + "epoch": 1.8025396825396824, + "grad_norm": 1.243788719177246, + "learning_rate": 1.1341294198374342e-05, + "loss": 0.1945, + "step": 710 + }, + { + "epoch": 1.815238095238095, + "grad_norm": 1.4781830310821533, + "learning_rate": 1.1138002516375865e-05, + "loss": 0.2031, + "step": 715 + }, + { + "epoch": 1.827936507936508, + "grad_norm": 1.2040355205535889, + "learning_rate": 1.0935467769980678e-05, + "loss": 0.1812, + "step": 720 + }, + { + "epoch": 1.8406349206349206, + "grad_norm": 1.264350414276123, + "learning_rate": 1.0733729655165056e-05, + "loss": 0.1845, + "step": 725 + }, + { + "epoch": 1.8533333333333335, + "grad_norm": 1.5212574005126953, + "learning_rate": 1.0532827711768747e-05, + "loss": 0.2176, + "step": 730 + }, + { + "epoch": 1.8660317460317462, + "grad_norm": 1.3029937744140625, + "learning_rate": 1.0332801315745363e-05, + "loss": 0.2149, + "step": 735 + }, + { + "epoch": 1.8787301587301588, + "grad_norm": 1.3432649374008179, + "learning_rate": 1.0133689671444817e-05, + "loss": 0.1894, + "step": 740 + }, + { + "epoch": 1.8914285714285715, + "grad_norm": 1.303925633430481, + "learning_rate": 9.93553180392947e-06, + "loss": 0.1836, + "step": 745 + }, + { + "epoch": 1.9041269841269841, + "grad_norm": 1.3186211585998535, + "learning_rate": 9.738366551325343e-06, + "loss": 0.1795, + "step": 750 + }, + { + "epoch": 1.9168253968253968, + "grad_norm": 1.2385101318359375, + "learning_rate": 9.542232557210039e-06, + "loss": 0.1863, + "step": 755 + }, + { + "epoch": 1.9295238095238094, + "grad_norm": 1.049270749092102, + "learning_rate": 9.347168263038728e-06, + "loss": 0.1618, + "step": 760 + }, + { + "epoch": 1.942222222222222, + "grad_norm": 1.2258292436599731, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1705, + "step": 765 + }, + { + "epoch": 1.954920634920635, + "grad_norm": 1.0637770891189575, + "learning_rate": 8.960401484571612e-06, + "loss": 0.1943, + "step": 770 + }, + { + "epoch": 1.9676190476190476, + "grad_norm": 1.2467912435531616, + "learning_rate": 8.768774804971705e-06, + "loss": 0.1666, + "step": 775 + }, + { + "epoch": 1.9803174603174605, + "grad_norm": 1.3742448091506958, + "learning_rate": 8.57836941985017e-06, + "loss": 0.1821, + "step": 780 + }, + { + "epoch": 1.9930158730158731, + "grad_norm": 1.2060065269470215, + "learning_rate": 8.389222647878426e-06, + "loss": 0.1663, + "step": 785 + }, + { + "epoch": 2.0050793650793652, + "grad_norm": 1.0109996795654297, + "learning_rate": 8.201371561044864e-06, + "loss": 0.158, + "step": 790 + }, + { + "epoch": 2.017777777777778, + "grad_norm": 1.2141554355621338, + "learning_rate": 8.014852977388965e-06, + "loss": 0.1623, + "step": 795 + }, + { + "epoch": 2.0304761904761905, + "grad_norm": 1.0854544639587402, + "learning_rate": 7.82970345378503e-06, + "loss": 0.1492, + "step": 800 + }, + { + "epoch": 2.043174603174603, + "grad_norm": 0.8425107598304749, + "learning_rate": 7.645959278777271e-06, + "loss": 0.129, + "step": 805 + }, + { + "epoch": 2.055873015873016, + "grad_norm": 1.0444252490997314, + "learning_rate": 7.46365646546732e-06, + "loss": 0.1217, + "step": 810 + }, + { + "epoch": 2.0685714285714285, + "grad_norm": 1.3091061115264893, + "learning_rate": 7.282830744455896e-06, + "loss": 0.1306, + "step": 815 + }, + { + "epoch": 2.081269841269841, + "grad_norm": 1.2025136947631836, + "learning_rate": 7.103517556839661e-06, + "loss": 0.1187, + "step": 820 + }, + { + "epoch": 2.093968253968254, + "grad_norm": 0.8649110198020935, + "learning_rate": 6.925752047265011e-06, + "loss": 0.1092, + "step": 825 + }, + { + "epoch": 2.1066666666666665, + "grad_norm": 0.9416567087173462, + "learning_rate": 6.749569057039771e-06, + "loss": 0.1532, + "step": 830 + }, + { + "epoch": 2.1193650793650796, + "grad_norm": 1.1786872148513794, + "learning_rate": 6.575003117304535e-06, + "loss": 0.1101, + "step": 835 + }, + { + "epoch": 2.132063492063492, + "grad_norm": 0.9411061406135559, + "learning_rate": 6.402088442264615e-06, + "loss": 0.1185, + "step": 840 + }, + { + "epoch": 2.144761904761905, + "grad_norm": 0.9576282501220703, + "learning_rate": 6.230858922484289e-06, + "loss": 0.1363, + "step": 845 + }, + { + "epoch": 2.1574603174603175, + "grad_norm": 1.0527238845825195, + "learning_rate": 6.061348118244298e-06, + "loss": 0.1201, + "step": 850 + }, + { + "epoch": 2.17015873015873, + "grad_norm": 0.9936991333961487, + "learning_rate": 5.893589252964258e-06, + "loss": 0.1043, + "step": 855 + }, + { + "epoch": 2.182857142857143, + "grad_norm": 0.9852300882339478, + "learning_rate": 5.727615206690921e-06, + "loss": 0.1113, + "step": 860 + }, + { + "epoch": 2.1955555555555555, + "grad_norm": 1.0078802108764648, + "learning_rate": 5.563458509653905e-06, + "loss": 0.1285, + "step": 865 + }, + { + "epoch": 2.208253968253968, + "grad_norm": 1.3041237592697144, + "learning_rate": 5.401151335889819e-06, + "loss": 0.1118, + "step": 870 + }, + { + "epoch": 2.220952380952381, + "grad_norm": 1.4168388843536377, + "learning_rate": 5.240725496936373e-06, + "loss": 0.1206, + "step": 875 + }, + { + "epoch": 2.233650793650794, + "grad_norm": 0.9101023077964783, + "learning_rate": 5.082212435597352e-06, + "loss": 0.1217, + "step": 880 + }, + { + "epoch": 2.2463492063492065, + "grad_norm": 1.2119768857955933, + "learning_rate": 4.925643219780053e-06, + "loss": 0.118, + "step": 885 + }, + { + "epoch": 2.259047619047619, + "grad_norm": 1.0441137552261353, + "learning_rate": 4.771048536406012e-06, + "loss": 0.1109, + "step": 890 + }, + { + "epoch": 2.271746031746032, + "grad_norm": 1.1253547668457031, + "learning_rate": 4.618458685396579e-06, + "loss": 0.1252, + "step": 895 + }, + { + "epoch": 2.2844444444444445, + "grad_norm": 1.3475861549377441, + "learning_rate": 4.467903573734174e-06, + "loss": 0.1093, + "step": 900 + }, + { + "epoch": 2.297142857142857, + "grad_norm": 0.845414936542511, + "learning_rate": 4.319412709600723e-06, + "loss": 0.093, + "step": 905 + }, + { + "epoch": 2.30984126984127, + "grad_norm": 0.9166451692581177, + "learning_rate": 4.173015196594103e-06, + "loss": 0.1063, + "step": 910 + }, + { + "epoch": 2.3225396825396825, + "grad_norm": 0.9841115474700928, + "learning_rate": 4.028739728024023e-06, + "loss": 0.1338, + "step": 915 + }, + { + "epoch": 2.335238095238095, + "grad_norm": 1.0335733890533447, + "learning_rate": 3.886614581288187e-06, + "loss": 0.1147, + "step": 920 + }, + { + "epoch": 2.3479365079365078, + "grad_norm": 0.8984823226928711, + "learning_rate": 3.746667612330109e-06, + "loss": 0.1149, + "step": 925 + }, + { + "epoch": 2.3606349206349204, + "grad_norm": 0.8267236351966858, + "learning_rate": 3.608926250179392e-06, + "loss": 0.1115, + "step": 930 + }, + { + "epoch": 2.3733333333333335, + "grad_norm": 0.8341403603553772, + "learning_rate": 3.4734174915758245e-06, + "loss": 0.1083, + "step": 935 + }, + { + "epoch": 2.386031746031746, + "grad_norm": 0.9599093198776245, + "learning_rate": 3.340167895678059e-06, + "loss": 0.1043, + "step": 940 + }, + { + "epoch": 2.398730158730159, + "grad_norm": 0.8416098356246948, + "learning_rate": 3.2092035788581907e-06, + "loss": 0.1125, + "step": 945 + }, + { + "epoch": 2.4114285714285715, + "grad_norm": 0.9431564211845398, + "learning_rate": 3.0805502095829987e-06, + "loss": 0.0954, + "step": 950 + }, + { + "epoch": 2.424126984126984, + "grad_norm": 0.8993948698043823, + "learning_rate": 2.954233003383089e-06, + "loss": 0.099, + "step": 955 + }, + { + "epoch": 2.436825396825397, + "grad_norm": 0.966899573802948, + "learning_rate": 2.830276717910692e-06, + "loss": 0.127, + "step": 960 + }, + { + "epoch": 2.4495238095238094, + "grad_norm": 1.119049072265625, + "learning_rate": 2.7087056480873322e-06, + "loss": 0.1105, + "step": 965 + }, + { + "epoch": 2.462222222222222, + "grad_norm": 0.7644155025482178, + "learning_rate": 2.5895436213420566e-06, + "loss": 0.0999, + "step": 970 + }, + { + "epoch": 2.4749206349206347, + "grad_norm": 0.7740854620933533, + "learning_rate": 2.4728139929414185e-06, + "loss": 0.1044, + "step": 975 + }, + { + "epoch": 2.487619047619048, + "grad_norm": 0.9954528212547302, + "learning_rate": 2.358539641411883e-06, + "loss": 0.1282, + "step": 980 + }, + { + "epoch": 2.5003174603174605, + "grad_norm": 1.2290841341018677, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.0985, + "step": 985 + }, + { + "epoch": 2.513015873015873, + "grad_norm": 0.9200103282928467, + "learning_rate": 2.1374458725615147e-06, + "loss": 0.1012, + "step": 990 + }, + { + "epoch": 2.525714285714286, + "grad_norm": 0.8433984518051147, + "learning_rate": 2.0306697887089238e-06, + "loss": 0.1094, + "step": 995 + }, + { + "epoch": 2.5384126984126985, + "grad_norm": 0.8732439279556274, + "learning_rate": 1.9264356401707402e-06, + "loss": 0.099, + "step": 1000 + }, + { + "epoch": 2.551111111111111, + "grad_norm": 0.8666094541549683, + "learning_rate": 1.824763856410861e-06, + "loss": 0.1015, + "step": 1005 + }, + { + "epoch": 2.5638095238095238, + "grad_norm": 0.851259171962738, + "learning_rate": 1.7256743646802092e-06, + "loss": 0.1132, + "step": 1010 + }, + { + "epoch": 2.5765079365079364, + "grad_norm": 0.7273902893066406, + "learning_rate": 1.6291865861111354e-06, + "loss": 0.1025, + "step": 1015 + }, + { + "epoch": 2.589206349206349, + "grad_norm": 0.9177793860435486, + "learning_rate": 1.5353194319108916e-06, + "loss": 0.095, + "step": 1020 + }, + { + "epoch": 2.6019047619047617, + "grad_norm": 0.8048842549324036, + "learning_rate": 1.4440912996551753e-06, + "loss": 0.1039, + "step": 1025 + }, + { + "epoch": 2.6146031746031744, + "grad_norm": 0.9663947820663452, + "learning_rate": 1.3555200696822235e-06, + "loss": 0.1074, + "step": 1030 + }, + { + "epoch": 2.627301587301587, + "grad_norm": 0.8551552891731262, + "learning_rate": 1.2696231015883914e-06, + "loss": 0.0902, + "step": 1035 + }, + { + "epoch": 2.64, + "grad_norm": 0.9103640913963318, + "learning_rate": 1.186417230825695e-06, + "loss": 0.1182, + "step": 1040 + }, + { + "epoch": 2.6526984126984128, + "grad_norm": 0.8719521760940552, + "learning_rate": 1.1059187654021764e-06, + "loss": 0.1067, + "step": 1045 + }, + { + "epoch": 2.6653968253968254, + "grad_norm": 0.7704371809959412, + "learning_rate": 1.0281434826855647e-06, + "loss": 0.1026, + "step": 1050 + }, + { + "epoch": 2.678095238095238, + "grad_norm": 0.7950895428657532, + "learning_rate": 9.531066263109972e-07, + "loss": 0.0934, + "step": 1055 + }, + { + "epoch": 2.6907936507936507, + "grad_norm": 0.7548843026161194, + "learning_rate": 8.808229031933285e-07, + "loss": 0.0962, + "step": 1060 + }, + { + "epoch": 2.7034920634920634, + "grad_norm": 0.7494012713432312, + "learning_rate": 8.113064806446286e-07, + "loss": 0.1018, + "step": 1065 + }, + { + "epoch": 2.716190476190476, + "grad_norm": 0.6789062023162842, + "learning_rate": 7.445709835974512e-07, + "loss": 0.0948, + "step": 1070 + }, + { + "epoch": 2.728888888888889, + "grad_norm": 0.9267260432243347, + "learning_rate": 6.806294919344053e-07, + "loss": 0.1068, + "step": 1075 + }, + { + "epoch": 2.741587301587302, + "grad_norm": 0.898684024810791, + "learning_rate": 6.194945379245498e-07, + "loss": 0.1123, + "step": 1080 + }, + { + "epoch": 2.7542857142857144, + "grad_norm": 0.7808497548103333, + "learning_rate": 5.611781037671176e-07, + "loss": 0.1082, + "step": 1085 + }, + { + "epoch": 2.766984126984127, + "grad_norm": 0.870790421962738, + "learning_rate": 5.056916192430622e-07, + "loss": 0.103, + "step": 1090 + }, + { + "epoch": 2.7796825396825398, + "grad_norm": 0.7754330039024353, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.0956, + "step": 1095 + }, + { + "epoch": 2.7923809523809524, + "grad_norm": 0.772956132888794, + "learning_rate": 4.032514427950307e-07, + "loss": 0.0924, + "step": 1100 + }, + { + "epoch": 2.805079365079365, + "grad_norm": 0.7881432175636292, + "learning_rate": 3.56317828723795e-07, + "loss": 0.0872, + "step": 1105 + }, + { + "epoch": 2.8177777777777777, + "grad_norm": 0.8554707765579224, + "learning_rate": 3.1225431605624134e-07, + "loss": 0.0905, + "step": 1110 + }, + { + "epoch": 2.8304761904761904, + "grad_norm": 0.9466358423233032, + "learning_rate": 2.710695410593994e-07, + "loss": 0.1005, + "step": 1115 + }, + { + "epoch": 2.843174603174603, + "grad_norm": 0.9236406087875366, + "learning_rate": 2.3277157577957398e-07, + "loss": 0.1006, + "step": 1120 + }, + { + "epoch": 2.8558730158730157, + "grad_norm": 0.7827917337417603, + "learning_rate": 1.9736792646024847e-07, + "loss": 0.0935, + "step": 1125 + }, + { + "epoch": 2.8685714285714283, + "grad_norm": 0.7761815190315247, + "learning_rate": 1.6486553207090194e-07, + "loss": 0.0984, + "step": 1130 + }, + { + "epoch": 2.8812698412698414, + "grad_norm": 0.7797147631645203, + "learning_rate": 1.3527076294698848e-07, + "loss": 0.0903, + "step": 1135 + }, + { + "epoch": 2.893968253968254, + "grad_norm": 1.053987979888916, + "learning_rate": 1.0858941954139112e-07, + "loss": 0.0911, + "step": 1140 + }, + { + "epoch": 2.9066666666666667, + "grad_norm": 0.8876226544380188, + "learning_rate": 8.482673128753948e-08, + "loss": 0.0844, + "step": 1145 + }, + { + "epoch": 2.9193650793650794, + "grad_norm": 0.6115774512290955, + "learning_rate": 6.398735557448299e-08, + "loss": 0.0844, + "step": 1150 + }, + { + "epoch": 2.932063492063492, + "grad_norm": 0.8332976698875427, + "learning_rate": 4.607537683404106e-08, + "loss": 0.0924, + "step": 1155 + }, + { + "epoch": 2.9447619047619047, + "grad_norm": 0.9227467775344849, + "learning_rate": 3.1094305740291816e-08, + "loss": 0.1112, + "step": 1160 + }, + { + "epoch": 2.9574603174603173, + "grad_norm": 0.7906195521354675, + "learning_rate": 1.9047078521474137e-08, + "loss": 0.1228, + "step": 1165 + }, + { + "epoch": 2.97015873015873, + "grad_norm": 0.8311947584152222, + "learning_rate": 9.93605638451467e-09, + "loss": 0.0978, + "step": 1170 + }, + { + "epoch": 2.982857142857143, + "grad_norm": 0.8368114233016968, + "learning_rate": 3.763025052231361e-09, + "loss": 0.1047, + "step": 1175 + }, + { + "epoch": 2.9955555555555557, + "grad_norm": 0.6747046113014221, + "learning_rate": 5.29194413350087e-10, + "loss": 0.0941, + "step": 1180 + }, + { + "epoch": 3.0, + "step": 1182, + "total_flos": 1.5053071121236623e+18, + "train_loss": 0.44160460342862684, + "train_runtime": 1177.0931, + "train_samples_per_second": 32.108, + "train_steps_per_second": 1.004 + } + ], + "logging_steps": 5, + "max_steps": 1182, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5053071121236623e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..819ac00e35e3d4e70c601eddab84a19e76030b35 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/14_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4080d0c461e8d8684624a350941ad3c0261ac6b3d1d3767acf477763d63bdcde +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7ed483deddfaf790787837dd13c91da4f2f06473 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 15_128_e3_3e-5 + results: [] +--- + + + +# 15_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9f14c5afdae6bdc41f021fc23220ebe28c10e654 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "k_proj", + "q_proj", + "o_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b905c35578549490fbdef2b5f198e87e6979a9e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0d2fd68f3fdcd2d478c52c5264a483de63e2c7cddb6496fbc150f12e549f27 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c15f5c33e95956da02a77c373c9bf89e138f132 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3289249601686077e+18, + "train_loss": 0.4596675281218216, + "train_runtime": 1048.95, + "train_samples": 11307, + "train_samples_per_second": 32.338, + "train_steps_per_second": 1.012 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c15f5c33e95956da02a77c373c9bf89e138f132 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3289249601686077e+18, + "train_loss": 0.4596675281218216, + "train_runtime": 1048.95, + "train_samples": 11307, + "train_samples_per_second": 32.338, + "train_steps_per_second": 1.012 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d96cb646e20e11ea67e89ff03eb22d44a8105d0b --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1527 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1062, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014144271570014143, + "grad_norm": 0.6470269560813904, + "learning_rate": 2.222222222222222e-06, + "loss": 1.5459, + "step": 5 + }, + { + "epoch": 0.028288543140028287, + "grad_norm": 0.5453058481216431, + "learning_rate": 4.9999999999999996e-06, + "loss": 1.6095, + "step": 10 + }, + { + "epoch": 0.042432814710042434, + "grad_norm": 0.5793577432632446, + "learning_rate": 7.777777777777777e-06, + "loss": 1.6428, + "step": 15 + }, + { + "epoch": 0.056577086280056574, + "grad_norm": 0.5567788481712341, + "learning_rate": 1.0555555555555555e-05, + "loss": 1.627, + "step": 20 + }, + { + "epoch": 0.07072135785007072, + "grad_norm": 0.547249436378479, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.5998, + "step": 25 + }, + { + "epoch": 0.08486562942008487, + "grad_norm": 0.5249947309494019, + "learning_rate": 1.6111111111111115e-05, + "loss": 1.5876, + "step": 30 + }, + { + "epoch": 0.09900990099009901, + "grad_norm": 0.5290120840072632, + "learning_rate": 1.888888888888889e-05, + "loss": 1.563, + "step": 35 + }, + { + "epoch": 0.11315417256011315, + "grad_norm": 0.48718002438545227, + "learning_rate": 2.1666666666666667e-05, + "loss": 1.5065, + "step": 40 + }, + { + "epoch": 0.1272984441301273, + "grad_norm": 0.5126219391822815, + "learning_rate": 2.4444444444444445e-05, + "loss": 1.4936, + "step": 45 + }, + { + "epoch": 0.14144271570014144, + "grad_norm": 0.513758659362793, + "learning_rate": 2.7222222222222223e-05, + "loss": 1.4777, + "step": 50 + }, + { + "epoch": 0.15558698727015557, + "grad_norm": 0.5639211535453796, + "learning_rate": 3e-05, + "loss": 1.3452, + "step": 55 + }, + { + "epoch": 0.16973125884016974, + "grad_norm": 0.5538976192474365, + "learning_rate": 2.9998178743291113e-05, + "loss": 1.3865, + "step": 60 + }, + { + "epoch": 0.18387553041018387, + "grad_norm": 0.6276597380638123, + "learning_rate": 2.9992715415427915e-05, + "loss": 1.3889, + "step": 65 + }, + { + "epoch": 0.19801980198019803, + "grad_norm": 1.0833052396774292, + "learning_rate": 2.998361134309341e-05, + "loss": 1.3592, + "step": 70 + }, + { + "epoch": 0.21216407355021216, + "grad_norm": 0.5610765218734741, + "learning_rate": 2.997086873706798e-05, + "loss": 1.3032, + "step": 75 + }, + { + "epoch": 0.2263083451202263, + "grad_norm": 0.7062208652496338, + "learning_rate": 2.995449069169251e-05, + "loss": 1.2942, + "step": 80 + }, + { + "epoch": 0.24045261669024046, + "grad_norm": 0.6978217959403992, + "learning_rate": 2.9934481184117005e-05, + "loss": 1.1986, + "step": 85 + }, + { + "epoch": 0.2545968882602546, + "grad_norm": 0.7358289361000061, + "learning_rate": 2.9910845073334793e-05, + "loss": 1.2552, + "step": 90 + }, + { + "epoch": 0.26874115983026875, + "grad_norm": 0.676780641078949, + "learning_rate": 2.9883588099002583e-05, + "loss": 1.2605, + "step": 95 + }, + { + "epoch": 0.2828854314002829, + "grad_norm": 0.69721919298172, + "learning_rate": 2.9852716880046687e-05, + "loss": 1.2891, + "step": 100 + }, + { + "epoch": 0.297029702970297, + "grad_norm": 0.6865009069442749, + "learning_rate": 2.981823891305572e-05, + "loss": 1.1911, + "step": 105 + }, + { + "epoch": 0.31117397454031115, + "grad_norm": 0.7750943303108215, + "learning_rate": 2.9780162570460182e-05, + "loss": 1.1786, + "step": 110 + }, + { + "epoch": 0.32531824611032534, + "grad_norm": 0.8052558302879333, + "learning_rate": 2.9738497098499325e-05, + "loss": 1.0951, + "step": 115 + }, + { + "epoch": 0.33946251768033947, + "grad_norm": 0.8017142415046692, + "learning_rate": 2.969325261497586e-05, + "loss": 1.0663, + "step": 120 + }, + { + "epoch": 0.3536067892503536, + "grad_norm": 0.7748960256576538, + "learning_rate": 2.9644440106799003e-05, + "loss": 1.1259, + "step": 125 + }, + { + "epoch": 0.36775106082036774, + "grad_norm": 1.0059736967086792, + "learning_rate": 2.9592071427316492e-05, + "loss": 1.0839, + "step": 130 + }, + { + "epoch": 0.38189533239038187, + "grad_norm": 1.0049115419387817, + "learning_rate": 2.9536159293436172e-05, + "loss": 1.0872, + "step": 135 + }, + { + "epoch": 0.39603960396039606, + "grad_norm": 0.8260437250137329, + "learning_rate": 2.9476717282537897e-05, + "loss": 1.0318, + "step": 140 + }, + { + "epoch": 0.4101838755304102, + "grad_norm": 0.9169187545776367, + "learning_rate": 2.9413759829176497e-05, + "loss": 1.053, + "step": 145 + }, + { + "epoch": 0.4243281471004243, + "grad_norm": 0.8924924731254578, + "learning_rate": 2.934730222157653e-05, + "loss": 0.9422, + "step": 150 + }, + { + "epoch": 0.43847241867043846, + "grad_norm": 1.0817251205444336, + "learning_rate": 2.927736059791984e-05, + "loss": 0.9689, + "step": 155 + }, + { + "epoch": 0.4526166902404526, + "grad_norm": 1.012832760810852, + "learning_rate": 2.9203951942426587e-05, + "loss": 0.981, + "step": 160 + }, + { + "epoch": 0.4667609618104668, + "grad_norm": 0.9768096208572388, + "learning_rate": 2.9127094081230952e-05, + "loss": 0.9646, + "step": 165 + }, + { + "epoch": 0.4809052333804809, + "grad_norm": 1.0733317136764526, + "learning_rate": 2.9046805678052312e-05, + "loss": 0.9013, + "step": 170 + }, + { + "epoch": 0.49504950495049505, + "grad_norm": 1.0418061017990112, + "learning_rate": 2.8963106229663064e-05, + "loss": 0.9844, + "step": 175 + }, + { + "epoch": 0.5091937765205092, + "grad_norm": 1.0275667905807495, + "learning_rate": 2.8876016061154125e-05, + "loss": 0.8985, + "step": 180 + }, + { + "epoch": 0.5233380480905233, + "grad_norm": 1.0465189218521118, + "learning_rate": 2.8785556320999308e-05, + "loss": 0.8718, + "step": 185 + }, + { + "epoch": 0.5374823196605375, + "grad_norm": 1.1605640649795532, + "learning_rate": 2.8691748975919784e-05, + "loss": 0.8478, + "step": 190 + }, + { + "epoch": 0.5516265912305516, + "grad_norm": 1.2359672784805298, + "learning_rate": 2.8594616805549752e-05, + "loss": 0.8294, + "step": 195 + }, + { + "epoch": 0.5657708628005658, + "grad_norm": 1.0865767002105713, + "learning_rate": 2.84941833969048e-05, + "loss": 0.7984, + "step": 200 + }, + { + "epoch": 0.57991513437058, + "grad_norm": 1.2092185020446777, + "learning_rate": 2.8390473138654175e-05, + "loss": 0.8279, + "step": 205 + }, + { + "epoch": 0.594059405940594, + "grad_norm": 1.113460659980774, + "learning_rate": 2.828351121519835e-05, + "loss": 0.7783, + "step": 210 + }, + { + "epoch": 0.6082036775106082, + "grad_norm": 1.2185325622558594, + "learning_rate": 2.817332360055343e-05, + "loss": 0.7665, + "step": 215 + }, + { + "epoch": 0.6223479490806223, + "grad_norm": 1.152376413345337, + "learning_rate": 2.8059937052043724e-05, + "loss": 0.8006, + "step": 220 + }, + { + "epoch": 0.6364922206506365, + "grad_norm": 1.1894851922988892, + "learning_rate": 2.7943379103804196e-05, + "loss": 0.7701, + "step": 225 + }, + { + "epoch": 0.6506364922206507, + "grad_norm": 1.191725730895996, + "learning_rate": 2.7823678060094198e-05, + "loss": 0.7128, + "step": 230 + }, + { + "epoch": 0.6647807637906648, + "grad_norm": 1.188659906387329, + "learning_rate": 2.770086298842426e-05, + "loss": 0.7058, + "step": 235 + }, + { + "epoch": 0.6789250353606789, + "grad_norm": 1.1337882280349731, + "learning_rate": 2.7574963712497486e-05, + "loss": 0.7903, + "step": 240 + }, + { + "epoch": 0.693069306930693, + "grad_norm": 1.302744746208191, + "learning_rate": 2.7446010804967314e-05, + "loss": 0.7229, + "step": 245 + }, + { + "epoch": 0.7072135785007072, + "grad_norm": 1.342345118522644, + "learning_rate": 2.7314035580013487e-05, + "loss": 0.7177, + "step": 250 + }, + { + "epoch": 0.7213578500707214, + "grad_norm": 1.1452064514160156, + "learning_rate": 2.717907008573785e-05, + "loss": 0.7376, + "step": 255 + }, + { + "epoch": 0.7355021216407355, + "grad_norm": 1.3301513195037842, + "learning_rate": 2.7041147096381988e-05, + "loss": 0.6625, + "step": 260 + }, + { + "epoch": 0.7496463932107497, + "grad_norm": 1.2731245756149292, + "learning_rate": 2.6900300104368527e-05, + "loss": 0.6923, + "step": 265 + }, + { + "epoch": 0.7637906647807637, + "grad_norm": 1.324074149131775, + "learning_rate": 2.6756563312168022e-05, + "loss": 0.6773, + "step": 270 + }, + { + "epoch": 0.7779349363507779, + "grad_norm": 1.227583408355713, + "learning_rate": 2.6609971623993412e-05, + "loss": 0.5809, + "step": 275 + }, + { + "epoch": 0.7920792079207921, + "grad_norm": 1.2684015035629272, + "learning_rate": 2.6460560637324113e-05, + "loss": 0.5916, + "step": 280 + }, + { + "epoch": 0.8062234794908062, + "grad_norm": 1.226627230644226, + "learning_rate": 2.6308366634261697e-05, + "loss": 0.5963, + "step": 285 + }, + { + "epoch": 0.8203677510608204, + "grad_norm": 1.2637792825698853, + "learning_rate": 2.6153426572719393e-05, + "loss": 0.6411, + "step": 290 + }, + { + "epoch": 0.8345120226308345, + "grad_norm": 1.1551690101623535, + "learning_rate": 2.5995778077447393e-05, + "loss": 0.5738, + "step": 295 + }, + { + "epoch": 0.8486562942008486, + "grad_norm": 1.4540832042694092, + "learning_rate": 2.5835459430896334e-05, + "loss": 0.5953, + "step": 300 + }, + { + "epoch": 0.8628005657708628, + "grad_norm": 1.2962878942489624, + "learning_rate": 2.5672509563920953e-05, + "loss": 0.6135, + "step": 305 + }, + { + "epoch": 0.8769448373408769, + "grad_norm": 1.3743832111358643, + "learning_rate": 2.5506968046326374e-05, + "loss": 0.5923, + "step": 310 + }, + { + "epoch": 0.8910891089108911, + "grad_norm": 1.1502060890197754, + "learning_rate": 2.5338875077259207e-05, + "loss": 0.6078, + "step": 315 + }, + { + "epoch": 0.9052333804809052, + "grad_norm": 1.268166184425354, + "learning_rate": 2.5168271475445795e-05, + "loss": 0.582, + "step": 320 + }, + { + "epoch": 0.9193776520509194, + "grad_norm": 1.4293423891067505, + "learning_rate": 2.499519866928006e-05, + "loss": 0.5667, + "step": 325 + }, + { + "epoch": 0.9335219236209336, + "grad_norm": 1.5681567192077637, + "learning_rate": 2.4819698686763245e-05, + "loss": 0.5708, + "step": 330 + }, + { + "epoch": 0.9476661951909476, + "grad_norm": 1.1015177965164185, + "learning_rate": 2.464181414529809e-05, + "loss": 0.5768, + "step": 335 + }, + { + "epoch": 0.9618104667609618, + "grad_norm": 1.235511302947998, + "learning_rate": 2.4461588241339877e-05, + "loss": 0.5505, + "step": 340 + }, + { + "epoch": 0.9759547383309759, + "grad_norm": 1.263378620147705, + "learning_rate": 2.4279064739906824e-05, + "loss": 0.5316, + "step": 345 + }, + { + "epoch": 0.9900990099009901, + "grad_norm": 1.3361446857452393, + "learning_rate": 2.4094287963952468e-05, + "loss": 0.5651, + "step": 350 + }, + { + "epoch": 1.002828854314003, + "grad_norm": 1.3076725006103516, + "learning_rate": 2.3907302783602522e-05, + "loss": 0.5101, + "step": 355 + }, + { + "epoch": 1.016973125884017, + "grad_norm": 1.256768822669983, + "learning_rate": 2.3718154605258885e-05, + "loss": 0.4572, + "step": 360 + }, + { + "epoch": 1.031117397454031, + "grad_norm": 1.43599271774292, + "learning_rate": 2.3526889360573387e-05, + "loss": 0.457, + "step": 365 + }, + { + "epoch": 1.0452616690240453, + "grad_norm": 1.7326899766921997, + "learning_rate": 2.3333553495294033e-05, + "loss": 0.4469, + "step": 370 + }, + { + "epoch": 1.0594059405940595, + "grad_norm": 1.4303010702133179, + "learning_rate": 2.3138193957986392e-05, + "loss": 0.4437, + "step": 375 + }, + { + "epoch": 1.0735502121640736, + "grad_norm": 1.3954081535339355, + "learning_rate": 2.2940858188632853e-05, + "loss": 0.4536, + "step": 380 + }, + { + "epoch": 1.0876944837340876, + "grad_norm": 1.3464250564575195, + "learning_rate": 2.27415941071126e-05, + "loss": 0.4048, + "step": 385 + }, + { + "epoch": 1.1018387553041018, + "grad_norm": 1.5511208772659302, + "learning_rate": 2.2540450101565002e-05, + "loss": 0.4449, + "step": 390 + }, + { + "epoch": 1.115983026874116, + "grad_norm": 1.376174807548523, + "learning_rate": 2.233747501663934e-05, + "loss": 0.4315, + "step": 395 + }, + { + "epoch": 1.1301272984441302, + "grad_norm": 1.488158106803894, + "learning_rate": 2.213271814163363e-05, + "loss": 0.4445, + "step": 400 + }, + { + "epoch": 1.1442715700141444, + "grad_norm": 1.3128489255905151, + "learning_rate": 2.192622919852551e-05, + "loss": 0.3849, + "step": 405 + }, + { + "epoch": 1.1584158415841583, + "grad_norm": 1.4015581607818604, + "learning_rate": 2.171805832989804e-05, + "loss": 0.365, + "step": 410 + }, + { + "epoch": 1.1725601131541725, + "grad_norm": 1.345054268836975, + "learning_rate": 2.1508256086763372e-05, + "loss": 0.4258, + "step": 415 + }, + { + "epoch": 1.1867043847241867, + "grad_norm": 1.3059314489364624, + "learning_rate": 2.1296873416287216e-05, + "loss": 0.4225, + "step": 420 + }, + { + "epoch": 1.200848656294201, + "grad_norm": 1.5096415281295776, + "learning_rate": 2.108396164941713e-05, + "loss": 0.4208, + "step": 425 + }, + { + "epoch": 1.214992927864215, + "grad_norm": 1.4186347723007202, + "learning_rate": 2.0869572488417626e-05, + "loss": 0.3708, + "step": 430 + }, + { + "epoch": 1.229137199434229, + "grad_norm": 1.5956618785858154, + "learning_rate": 2.0653757994315078e-05, + "loss": 0.3683, + "step": 435 + }, + { + "epoch": 1.2432814710042432, + "grad_norm": 1.3237695693969727, + "learning_rate": 2.0436570574255526e-05, + "loss": 0.3363, + "step": 440 + }, + { + "epoch": 1.2574257425742574, + "grad_norm": 1.306954264640808, + "learning_rate": 2.0218062968778407e-05, + "loss": 0.3775, + "step": 445 + }, + { + "epoch": 1.2715700141442716, + "grad_norm": 1.4159201383590698, + "learning_rate": 1.999828823900939e-05, + "loss": 0.3793, + "step": 450 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.6197909116744995, + "learning_rate": 1.977729975377527e-05, + "loss": 0.3646, + "step": 455 + }, + { + "epoch": 1.2998585572842998, + "grad_norm": 1.3110264539718628, + "learning_rate": 1.9555151176644223e-05, + "loss": 0.348, + "step": 460 + }, + { + "epoch": 1.314002828854314, + "grad_norm": 1.4901666641235352, + "learning_rate": 1.933189645289445e-05, + "loss": 0.344, + "step": 465 + }, + { + "epoch": 1.3281471004243282, + "grad_norm": 1.463713526725769, + "learning_rate": 1.91075897964144e-05, + "loss": 0.3608, + "step": 470 + }, + { + "epoch": 1.3422913719943423, + "grad_norm": 1.430551528930664, + "learning_rate": 1.888228567653781e-05, + "loss": 0.333, + "step": 475 + }, + { + "epoch": 1.3564356435643563, + "grad_norm": 1.4223859310150146, + "learning_rate": 1.8656038804816662e-05, + "loss": 0.3426, + "step": 480 + }, + { + "epoch": 1.3705799151343705, + "grad_norm": 1.312911868095398, + "learning_rate": 1.8428904121735345e-05, + "loss": 0.3521, + "step": 485 + }, + { + "epoch": 1.3847241867043847, + "grad_norm": 1.7429078817367554, + "learning_rate": 1.8200936783369252e-05, + "loss": 0.2954, + "step": 490 + }, + { + "epoch": 1.3988684582743989, + "grad_norm": 1.164272665977478, + "learning_rate": 1.7972192147990966e-05, + "loss": 0.295, + "step": 495 + }, + { + "epoch": 1.413012729844413, + "grad_norm": 2.0686724185943604, + "learning_rate": 1.7742725762627395e-05, + "loss": 0.3072, + "step": 500 + }, + { + "epoch": 1.427157001414427, + "grad_norm": 1.4997198581695557, + "learning_rate": 1.7512593349571046e-05, + "loss": 0.301, + "step": 505 + }, + { + "epoch": 1.4413012729844414, + "grad_norm": 1.337180495262146, + "learning_rate": 1.7281850792848752e-05, + "loss": 0.3018, + "step": 510 + }, + { + "epoch": 1.4554455445544554, + "grad_norm": 1.236642837524414, + "learning_rate": 1.7050554124651103e-05, + "loss": 0.2737, + "step": 515 + }, + { + "epoch": 1.4695898161244696, + "grad_norm": 1.2961151599884033, + "learning_rate": 1.6818759511725922e-05, + "loss": 0.2932, + "step": 520 + }, + { + "epoch": 1.4837340876944838, + "grad_norm": 1.1835635900497437, + "learning_rate": 1.658652324173907e-05, + "loss": 0.2632, + "step": 525 + }, + { + "epoch": 1.4978783592644977, + "grad_norm": 1.386008620262146, + "learning_rate": 1.6353901709605838e-05, + "loss": 0.2427, + "step": 530 + }, + { + "epoch": 1.5120226308345122, + "grad_norm": 1.3085600137710571, + "learning_rate": 1.6120951403796367e-05, + "loss": 0.2665, + "step": 535 + }, + { + "epoch": 1.5261669024045261, + "grad_norm": 1.4209017753601074, + "learning_rate": 1.5887728892618292e-05, + "loss": 0.2482, + "step": 540 + }, + { + "epoch": 1.5403111739745403, + "grad_norm": 1.3191282749176025, + "learning_rate": 1.5654290810480043e-05, + "loss": 0.256, + "step": 545 + }, + { + "epoch": 1.5544554455445545, + "grad_norm": 1.51828134059906, + "learning_rate": 1.5420693844138036e-05, + "loss": 0.2805, + "step": 550 + }, + { + "epoch": 1.5685997171145685, + "grad_norm": 1.4244929552078247, + "learning_rate": 1.5186994718931227e-05, + "loss": 0.2693, + "step": 555 + }, + { + "epoch": 1.5827439886845829, + "grad_norm": 1.5726312398910522, + "learning_rate": 1.4953250185006236e-05, + "loss": 0.2441, + "step": 560 + }, + { + "epoch": 1.5968882602545968, + "grad_norm": 1.4098913669586182, + "learning_rate": 1.471951700353647e-05, + "loss": 0.265, + "step": 565 + }, + { + "epoch": 1.611032531824611, + "grad_norm": 1.3976566791534424, + "learning_rate": 1.4485851932938575e-05, + "loss": 0.2747, + "step": 570 + }, + { + "epoch": 1.6251768033946252, + "grad_norm": 1.4219110012054443, + "learning_rate": 1.4252311715089542e-05, + "loss": 0.2712, + "step": 575 + }, + { + "epoch": 1.6393210749646392, + "grad_norm": 1.2495921850204468, + "learning_rate": 1.4018953061547854e-05, + "loss": 0.2642, + "step": 580 + }, + { + "epoch": 1.6534653465346536, + "grad_norm": 1.2345384359359741, + "learning_rate": 1.3785832639781951e-05, + "loss": 0.2385, + "step": 585 + }, + { + "epoch": 1.6676096181046676, + "grad_norm": 1.2338228225708008, + "learning_rate": 1.355300705940945e-05, + "loss": 0.2414, + "step": 590 + }, + { + "epoch": 1.6817538896746818, + "grad_norm": 1.224704623222351, + "learning_rate": 1.3320532858450382e-05, + "loss": 0.2353, + "step": 595 + }, + { + "epoch": 1.695898161244696, + "grad_norm": 1.2852225303649902, + "learning_rate": 1.3088466489597838e-05, + "loss": 0.2467, + "step": 600 + }, + { + "epoch": 1.71004243281471, + "grad_norm": 1.4443373680114746, + "learning_rate": 1.2856864306509303e-05, + "loss": 0.2566, + "step": 605 + }, + { + "epoch": 1.7241867043847243, + "grad_norm": 1.5311261415481567, + "learning_rate": 1.2625782550122076e-05, + "loss": 0.2202, + "step": 610 + }, + { + "epoch": 1.7383309759547383, + "grad_norm": 1.2267022132873535, + "learning_rate": 1.2395277334996045e-05, + "loss": 0.2199, + "step": 615 + }, + { + "epoch": 1.7524752475247525, + "grad_norm": 1.4180679321289062, + "learning_rate": 1.2165404635687139e-05, + "loss": 0.2255, + "step": 620 + }, + { + "epoch": 1.7666195190947667, + "grad_norm": 1.7082161903381348, + "learning_rate": 1.1936220273154797e-05, + "loss": 0.2395, + "step": 625 + }, + { + "epoch": 1.7807637906647806, + "grad_norm": 1.2529240846633911, + "learning_rate": 1.1707779901206733e-05, + "loss": 0.2108, + "step": 630 + }, + { + "epoch": 1.794908062234795, + "grad_norm": 1.2624605894088745, + "learning_rate": 1.1480138992984276e-05, + "loss": 0.2087, + "step": 635 + }, + { + "epoch": 1.809052333804809, + "grad_norm": 1.320502758026123, + "learning_rate": 1.1253352827491594e-05, + "loss": 0.1849, + "step": 640 + }, + { + "epoch": 1.8231966053748232, + "grad_norm": 1.3565486669540405, + "learning_rate": 1.102747647617209e-05, + "loss": 0.1978, + "step": 645 + }, + { + "epoch": 1.8373408769448374, + "grad_norm": 1.991613745689392, + "learning_rate": 1.080256478953512e-05, + "loss": 0.2326, + "step": 650 + }, + { + "epoch": 1.8514851485148514, + "grad_norm": 1.6479405164718628, + "learning_rate": 1.0578672383836437e-05, + "loss": 0.1628, + "step": 655 + }, + { + "epoch": 1.8656294200848658, + "grad_norm": 1.2765008211135864, + "learning_rate": 1.0355853627815506e-05, + "loss": 0.2066, + "step": 660 + }, + { + "epoch": 1.8797736916548797, + "grad_norm": 1.3529490232467651, + "learning_rate": 1.0134162629492895e-05, + "loss": 0.2026, + "step": 665 + }, + { + "epoch": 1.893917963224894, + "grad_norm": 1.5032598972320557, + "learning_rate": 9.913653223031e-06, + "loss": 0.2238, + "step": 670 + }, + { + "epoch": 1.908062234794908, + "grad_norm": 1.1178032159805298, + "learning_rate": 9.69437895566128e-06, + "loss": 0.1902, + "step": 675 + }, + { + "epoch": 1.922206506364922, + "grad_norm": 1.5137513875961304, + "learning_rate": 9.476393074681134e-06, + "loss": 0.1972, + "step": 680 + }, + { + "epoch": 1.9363507779349365, + "grad_norm": 1.278634786605835, + "learning_rate": 9.259748514523654e-06, + "loss": 0.1515, + "step": 685 + }, + { + "epoch": 1.9504950495049505, + "grad_norm": 1.298923134803772, + "learning_rate": 9.044497883903326e-06, + "loss": 0.1746, + "step": 690 + }, + { + "epoch": 1.9646393210749646, + "grad_norm": 1.2538295984268188, + "learning_rate": 8.83069345304083e-06, + "loss": 0.1822, + "step": 695 + }, + { + "epoch": 1.9787835926449788, + "grad_norm": 1.3477420806884766, + "learning_rate": 8.618387140970047e-06, + "loss": 0.2134, + "step": 700 + }, + { + "epoch": 1.9929278642149928, + "grad_norm": 1.438564419746399, + "learning_rate": 8.407630502930325e-06, + "loss": 0.1697, + "step": 705 + }, + { + "epoch": 2.005657708628006, + "grad_norm": 1.1922780275344849, + "learning_rate": 8.198474717847146e-06, + "loss": 0.1593, + "step": 710 + }, + { + "epoch": 2.01980198019802, + "grad_norm": 1.2811698913574219, + "learning_rate": 7.99097057590407e-06, + "loss": 0.152, + "step": 715 + }, + { + "epoch": 2.033946251768034, + "grad_norm": 1.2099448442459106, + "learning_rate": 7.785168466209188e-06, + "loss": 0.1427, + "step": 720 + }, + { + "epoch": 2.048090523338048, + "grad_norm": 1.163729190826416, + "learning_rate": 7.581118364558889e-06, + "loss": 0.1434, + "step": 725 + }, + { + "epoch": 2.062234794908062, + "grad_norm": 1.1432162523269653, + "learning_rate": 7.378869821302062e-06, + "loss": 0.1294, + "step": 730 + }, + { + "epoch": 2.0763790664780766, + "grad_norm": 1.1454987525939941, + "learning_rate": 7.17847194930753e-06, + "loss": 0.1384, + "step": 735 + }, + { + "epoch": 2.0905233380480905, + "grad_norm": 1.121763825416565, + "learning_rate": 6.9799734120378105e-06, + "loss": 0.1348, + "step": 740 + }, + { + "epoch": 2.1046676096181045, + "grad_norm": 1.1021969318389893, + "learning_rate": 6.783422411731932e-06, + "loss": 0.1375, + "step": 745 + }, + { + "epoch": 2.118811881188119, + "grad_norm": 1.2024462223052979, + "learning_rate": 6.58886667770028e-06, + "loss": 0.1483, + "step": 750 + }, + { + "epoch": 2.132956152758133, + "grad_norm": 0.9385703206062317, + "learning_rate": 6.3963534547343126e-06, + "loss": 0.1241, + "step": 755 + }, + { + "epoch": 2.1471004243281473, + "grad_norm": 1.0785479545593262, + "learning_rate": 6.205929491633869e-06, + "loss": 0.1417, + "step": 760 + }, + { + "epoch": 2.1612446958981613, + "grad_norm": 1.29376220703125, + "learning_rate": 6.017641029854996e-06, + "loss": 0.1263, + "step": 765 + }, + { + "epoch": 2.1753889674681752, + "grad_norm": 1.1285709142684937, + "learning_rate": 5.831533792280926e-06, + "loss": 0.125, + "step": 770 + }, + { + "epoch": 2.1895332390381896, + "grad_norm": 1.1961309909820557, + "learning_rate": 5.647652972118998e-06, + "loss": 0.1264, + "step": 775 + }, + { + "epoch": 2.2036775106082036, + "grad_norm": 0.9729381203651428, + "learning_rate": 5.46604322192618e-06, + "loss": 0.1287, + "step": 780 + }, + { + "epoch": 2.217821782178218, + "grad_norm": 1.1855789422988892, + "learning_rate": 5.286748642765945e-06, + "loss": 0.1313, + "step": 785 + }, + { + "epoch": 2.231966053748232, + "grad_norm": 1.0405877828598022, + "learning_rate": 5.109812773498968e-06, + "loss": 0.1278, + "step": 790 + }, + { + "epoch": 2.246110325318246, + "grad_norm": 1.0911056995391846, + "learning_rate": 4.935278580210451e-06, + "loss": 0.1221, + "step": 795 + }, + { + "epoch": 2.2602545968882604, + "grad_norm": 0.9496909976005554, + "learning_rate": 4.763188445776447e-06, + "loss": 0.1248, + "step": 800 + }, + { + "epoch": 2.2743988684582743, + "grad_norm": 0.8919278383255005, + "learning_rate": 4.593584159571875e-06, + "loss": 0.123, + "step": 805 + }, + { + "epoch": 2.2885431400282887, + "grad_norm": 0.8893589973449707, + "learning_rate": 4.426506907322624e-06, + "loss": 0.0992, + "step": 810 + }, + { + "epoch": 2.3026874115983027, + "grad_norm": 0.9838733673095703, + "learning_rate": 4.261997261104223e-06, + "loss": 0.1172, + "step": 815 + }, + { + "epoch": 2.3168316831683167, + "grad_norm": 1.0234827995300293, + "learning_rate": 4.100095169489597e-06, + "loss": 0.1239, + "step": 820 + }, + { + "epoch": 2.330975954738331, + "grad_norm": 0.8916055560112, + "learning_rate": 3.940839947848141e-06, + "loss": 0.1133, + "step": 825 + }, + { + "epoch": 2.345120226308345, + "grad_norm": 0.9613996148109436, + "learning_rate": 3.7842702687986374e-06, + "loss": 0.1236, + "step": 830 + }, + { + "epoch": 2.3592644978783595, + "grad_norm": 1.1014704704284668, + "learning_rate": 3.630424152818203e-06, + "loss": 0.1164, + "step": 835 + }, + { + "epoch": 2.3734087694483734, + "grad_norm": 0.9858735203742981, + "learning_rate": 3.479338959009625e-06, + "loss": 0.1046, + "step": 840 + }, + { + "epoch": 2.3875530410183874, + "grad_norm": 0.9532977342605591, + "learning_rate": 3.331051376029279e-06, + "loss": 0.118, + "step": 845 + }, + { + "epoch": 2.401697312588402, + "grad_norm": 1.0921580791473389, + "learning_rate": 3.1855974131778696e-06, + "loss": 0.1135, + "step": 850 + }, + { + "epoch": 2.4158415841584158, + "grad_norm": 1.033462405204773, + "learning_rate": 3.0430123916561677e-06, + "loss": 0.1043, + "step": 855 + }, + { + "epoch": 2.42998585572843, + "grad_norm": 1.0211292505264282, + "learning_rate": 2.90333093598776e-06, + "loss": 0.1152, + "step": 860 + }, + { + "epoch": 2.444130127298444, + "grad_norm": 0.9785246253013611, + "learning_rate": 2.7665869656110974e-06, + "loss": 0.1203, + "step": 865 + }, + { + "epoch": 2.458274398868458, + "grad_norm": 1.0705971717834473, + "learning_rate": 2.6328136866426294e-06, + "loss": 0.1158, + "step": 870 + }, + { + "epoch": 2.4724186704384725, + "grad_norm": 0.9838902354240417, + "learning_rate": 2.502043583813268e-06, + "loss": 0.1145, + "step": 875 + }, + { + "epoch": 2.4865629420084865, + "grad_norm": 0.9267615079879761, + "learning_rate": 2.3743084125799515e-06, + "loss": 0.116, + "step": 880 + }, + { + "epoch": 2.500707213578501, + "grad_norm": 1.0617531538009644, + "learning_rate": 2.2496391914143634e-06, + "loss": 0.115, + "step": 885 + }, + { + "epoch": 2.514851485148515, + "grad_norm": 0.7634238004684448, + "learning_rate": 2.1280661942705554e-06, + "loss": 0.1108, + "step": 890 + }, + { + "epoch": 2.528995756718529, + "grad_norm": 0.9655850529670715, + "learning_rate": 2.0096189432334194e-06, + "loss": 0.1194, + "step": 895 + }, + { + "epoch": 2.5431400282885432, + "grad_norm": 0.6987083554267883, + "learning_rate": 1.8943262013497082e-06, + "loss": 0.1045, + "step": 900 + }, + { + "epoch": 2.557284299858557, + "grad_norm": 0.9216400384902954, + "learning_rate": 1.782215965643364e-06, + "loss": 0.1056, + "step": 905 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.8495373725891113, + "learning_rate": 1.6733154603169177e-06, + "loss": 0.1002, + "step": 910 + }, + { + "epoch": 2.5855728429985856, + "grad_norm": 0.872205376625061, + "learning_rate": 1.5676511301404861e-06, + "loss": 0.1061, + "step": 915 + }, + { + "epoch": 2.5997171145685996, + "grad_norm": 1.0389809608459473, + "learning_rate": 1.465248634030103e-06, + "loss": 0.1064, + "step": 920 + }, + { + "epoch": 2.613861386138614, + "grad_norm": 0.876620888710022, + "learning_rate": 1.3661328388168358e-06, + "loss": 0.1031, + "step": 925 + }, + { + "epoch": 2.628005657708628, + "grad_norm": 1.1367650032043457, + "learning_rate": 1.2703278132082936e-06, + "loss": 0.1017, + "step": 930 + }, + { + "epoch": 2.6421499292786423, + "grad_norm": 0.9264767169952393, + "learning_rate": 1.177856821943884e-06, + "loss": 0.1119, + "step": 935 + }, + { + "epoch": 2.6562942008486563, + "grad_norm": 0.79477858543396, + "learning_rate": 1.0887423201453778e-06, + "loss": 0.1088, + "step": 940 + }, + { + "epoch": 2.6704384724186703, + "grad_norm": 0.8350077867507935, + "learning_rate": 1.0030059478640025e-06, + "loss": 0.089, + "step": 945 + }, + { + "epoch": 2.6845827439886847, + "grad_norm": 0.8641753792762756, + "learning_rate": 9.206685248255248e-07, + "loss": 0.1015, + "step": 950 + }, + { + "epoch": 2.6987270155586986, + "grad_norm": 0.9205171465873718, + "learning_rate": 8.417500453744864e-07, + "loss": 0.1081, + "step": 955 + }, + { + "epoch": 2.7128712871287126, + "grad_norm": 0.9577580094337463, + "learning_rate": 7.662696736189129e-07, + "loss": 0.1047, + "step": 960 + }, + { + "epoch": 2.727015558698727, + "grad_norm": 0.7271941900253296, + "learning_rate": 6.942457387765977e-07, + "loss": 0.0998, + "step": 965 + }, + { + "epoch": 2.741159830268741, + "grad_norm": 0.7201496958732605, + "learning_rate": 6.256957307241396e-07, + "loss": 0.1073, + "step": 970 + }, + { + "epoch": 2.7553041018387554, + "grad_norm": 0.9902117252349854, + "learning_rate": 5.606362957498195e-07, + "loss": 0.0948, + "step": 975 + }, + { + "epoch": 2.7694483734087694, + "grad_norm": 0.7257112264633179, + "learning_rate": 4.990832325112898e-07, + "loss": 0.1071, + "step": 980 + }, + { + "epoch": 2.783592644978784, + "grad_norm": 0.8262088298797607, + "learning_rate": 4.4105148819913564e-07, + "loss": 0.0948, + "step": 985 + }, + { + "epoch": 2.7977369165487977, + "grad_norm": 0.765438437461853, + "learning_rate": 3.865551549071772e-07, + "loss": 0.1064, + "step": 990 + }, + { + "epoch": 2.8118811881188117, + "grad_norm": 0.9083518981933594, + "learning_rate": 3.3560746621043193e-07, + "loss": 0.0925, + "step": 995 + }, + { + "epoch": 2.826025459688826, + "grad_norm": 0.7244449853897095, + "learning_rate": 2.8822079395154357e-07, + "loss": 0.0958, + "step": 1000 + }, + { + "epoch": 2.84016973125884, + "grad_norm": 0.7777174711227417, + "learning_rate": 2.4440664523648017e-07, + "loss": 0.074, + "step": 1005 + }, + { + "epoch": 2.854314002828854, + "grad_norm": 0.8775291442871094, + "learning_rate": 2.041756596402161e-07, + "loss": 0.1014, + "step": 1010 + }, + { + "epoch": 2.8684582743988685, + "grad_norm": 0.6547220945358276, + "learning_rate": 1.6753760662307217e-07, + "loss": 0.0907, + "step": 1015 + }, + { + "epoch": 2.882602545968883, + "grad_norm": 0.7218748927116394, + "learning_rate": 1.3450138315836303e-07, + "loss": 0.093, + "step": 1020 + }, + { + "epoch": 2.896746817538897, + "grad_norm": 0.8639464974403381, + "learning_rate": 1.050750115719057e-07, + "loss": 0.0937, + "step": 1025 + }, + { + "epoch": 2.910891089108911, + "grad_norm": 0.8950945734977722, + "learning_rate": 7.926563759392436e-08, + "loss": 0.0829, + "step": 1030 + }, + { + "epoch": 2.9250353606789252, + "grad_norm": 0.803298830986023, + "learning_rate": 5.7079528623816824e-08, + "loss": 0.1001, + "step": 1035 + }, + { + "epoch": 2.939179632248939, + "grad_norm": 0.80543053150177, + "learning_rate": 3.85220722082269e-08, + "loss": 0.1089, + "step": 1040 + }, + { + "epoch": 2.953323903818953, + "grad_norm": 0.8624736666679382, + "learning_rate": 2.3597774732750932e-08, + "loss": 0.1131, + "step": 1045 + }, + { + "epoch": 2.9674681753889676, + "grad_norm": 0.7844550609588623, + "learning_rate": 1.2310260327646483e-08, + "loss": 0.0839, + "step": 1050 + }, + { + "epoch": 2.9816124469589815, + "grad_norm": 0.7018393874168396, + "learning_rate": 4.662269987756318e-09, + "loss": 0.094, + "step": 1055 + }, + { + "epoch": 2.9957567185289955, + "grad_norm": 0.7810143828392029, + "learning_rate": 6.556609069091613e-10, + "loss": 0.1014, + "step": 1060 + }, + { + "epoch": 3.0, + "step": 1062, + "total_flos": 1.3289249601686077e+18, + "train_loss": 0.4596675281218216, + "train_runtime": 1048.95, + "train_samples_per_second": 32.338, + "train_steps_per_second": 1.012 + } + ], + "logging_steps": 5, + "max_steps": 1062, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3289249601686077e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..deb1ff390de3724007cb7e121b2645fa69b6f043 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/15_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82e5d59c582396d1aa2a7bdbee4da887bce1dc66b98f499d1e62730826f955d +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..76b86e3e7816c81835902b6d5c3c2d5398c6e6b0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 16_128_e3_3e-5 + results: [] +--- + + + +# 16_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0f063125823d63eebea8d6136ec417c994ada67 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac0f3959fb227201b0a160c3c873c78468a43bf7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844e0814e09e27de6214db43075642749820964a8ed8c9f22059923136257e13 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..71013676c472e43081fdf3799f3d1b6d34a87ab7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4623446908954214e+18, + "train_loss": 0.4343231242635976, + "train_runtime": 1174.5204, + "train_samples": 12500, + "train_samples_per_second": 31.928, + "train_steps_per_second": 0.999 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..71013676c472e43081fdf3799f3d1b6d34a87ab7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4623446908954214e+18, + "train_loss": 0.4343231242635976, + "train_runtime": 1174.5204, + "train_samples": 12500, + "train_samples_per_second": 31.928, + "train_steps_per_second": 0.999 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c686d48971f105fde3737c7a21d53fe4dc518b8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1681 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1173, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012795905310300703, + "grad_norm": 0.6061513423919678, + "learning_rate": 2.033898305084746e-06, + "loss": 1.5942, + "step": 5 + }, + { + "epoch": 0.025591810620601407, + "grad_norm": 0.6326838135719299, + "learning_rate": 4.576271186440678e-06, + "loss": 1.5748, + "step": 10 + }, + { + "epoch": 0.03838771593090211, + "grad_norm": 0.6796743273735046, + "learning_rate": 7.1186440677966106e-06, + "loss": 1.5437, + "step": 15 + }, + { + "epoch": 0.05118362124120281, + "grad_norm": 0.5158414244651794, + "learning_rate": 9.661016949152542e-06, + "loss": 1.5898, + "step": 20 + }, + { + "epoch": 0.06397952655150352, + "grad_norm": 0.5181837677955627, + "learning_rate": 1.2203389830508475e-05, + "loss": 1.5508, + "step": 25 + }, + { + "epoch": 0.07677543186180422, + "grad_norm": 0.4937606751918793, + "learning_rate": 1.4745762711864408e-05, + "loss": 1.4278, + "step": 30 + }, + { + "epoch": 0.08957133717210493, + "grad_norm": 0.44417810440063477, + "learning_rate": 1.728813559322034e-05, + "loss": 1.5198, + "step": 35 + }, + { + "epoch": 0.10236724248240563, + "grad_norm": 0.529248058795929, + "learning_rate": 1.983050847457627e-05, + "loss": 1.4627, + "step": 40 + }, + { + "epoch": 0.11516314779270634, + "grad_norm": 0.5200732946395874, + "learning_rate": 2.2372881355932205e-05, + "loss": 1.4587, + "step": 45 + }, + { + "epoch": 0.12795905310300704, + "grad_norm": 0.5085671544075012, + "learning_rate": 2.4915254237288138e-05, + "loss": 1.3774, + "step": 50 + }, + { + "epoch": 0.14075495841330773, + "grad_norm": 0.5274409055709839, + "learning_rate": 2.7457627118644068e-05, + "loss": 1.4352, + "step": 55 + }, + { + "epoch": 0.15355086372360843, + "grad_norm": 0.6060143709182739, + "learning_rate": 3e-05, + "loss": 1.4025, + "step": 60 + }, + { + "epoch": 0.16634676903390916, + "grad_norm": 0.6283566951751709, + "learning_rate": 2.999850884276484e-05, + "loss": 1.3898, + "step": 65 + }, + { + "epoch": 0.17914267434420986, + "grad_norm": 0.8579778671264648, + "learning_rate": 2.999403566753267e-05, + "loss": 1.267, + "step": 70 + }, + { + "epoch": 0.19193857965451055, + "grad_norm": 0.5817264914512634, + "learning_rate": 2.9986581363664512e-05, + "loss": 1.3713, + "step": 75 + }, + { + "epoch": 0.20473448496481125, + "grad_norm": 0.694118857383728, + "learning_rate": 2.997614741323225e-05, + "loss": 1.3174, + "step": 80 + }, + { + "epoch": 0.21753039027511195, + "grad_norm": 0.6447925567626953, + "learning_rate": 2.9962735890723977e-05, + "loss": 1.1891, + "step": 85 + }, + { + "epoch": 0.23032629558541268, + "grad_norm": 0.7169050574302673, + "learning_rate": 2.994634946263153e-05, + "loss": 1.2038, + "step": 90 + }, + { + "epoch": 0.24312220089571338, + "grad_norm": 0.7172923684120178, + "learning_rate": 2.9926991386920353e-05, + "loss": 1.2112, + "step": 95 + }, + { + "epoch": 0.2559181062060141, + "grad_norm": 0.6412022113800049, + "learning_rate": 2.9904665512381735e-05, + "loss": 1.2261, + "step": 100 + }, + { + "epoch": 0.2687140115163148, + "grad_norm": 0.7059776782989502, + "learning_rate": 2.987937627786759e-05, + "loss": 1.1538, + "step": 105 + }, + { + "epoch": 0.28150991682661547, + "grad_norm": 0.7091619968414307, + "learning_rate": 2.985112871140792e-05, + "loss": 1.1509, + "step": 110 + }, + { + "epoch": 0.2943058221369162, + "grad_norm": 0.801896333694458, + "learning_rate": 2.9819928429211133e-05, + "loss": 1.0751, + "step": 115 + }, + { + "epoch": 0.30710172744721687, + "grad_norm": 0.9150415658950806, + "learning_rate": 2.9785781634547438e-05, + "loss": 1.1506, + "step": 120 + }, + { + "epoch": 0.3198976327575176, + "grad_norm": 0.8006790280342102, + "learning_rate": 2.9748695116515496e-05, + "loss": 1.163, + "step": 125 + }, + { + "epoch": 0.3326935380678183, + "grad_norm": 0.8102880716323853, + "learning_rate": 2.970867624869259e-05, + "loss": 1.1156, + "step": 130 + }, + { + "epoch": 0.345489443378119, + "grad_norm": 0.9389704465866089, + "learning_rate": 2.9665732987668633e-05, + "loss": 1.0077, + "step": 135 + }, + { + "epoch": 0.3582853486884197, + "grad_norm": 0.8056272268295288, + "learning_rate": 2.9619873871464203e-05, + "loss": 1.0044, + "step": 140 + }, + { + "epoch": 0.3710812539987204, + "grad_norm": 0.9529396295547485, + "learning_rate": 2.957110801783303e-05, + "loss": 0.9989, + "step": 145 + }, + { + "epoch": 0.3838771593090211, + "grad_norm": 1.1513861417770386, + "learning_rate": 2.9519445122449174e-05, + "loss": 1.0216, + "step": 150 + }, + { + "epoch": 0.39667306461932184, + "grad_norm": 0.9237390756607056, + "learning_rate": 2.946489545697933e-05, + "loss": 0.9888, + "step": 155 + }, + { + "epoch": 0.4094689699296225, + "grad_norm": 1.113563060760498, + "learning_rate": 2.9407469867040615e-05, + "loss": 0.9581, + "step": 160 + }, + { + "epoch": 0.42226487523992323, + "grad_norm": 0.9809014201164246, + "learning_rate": 2.9347179770044217e-05, + "loss": 0.9358, + "step": 165 + }, + { + "epoch": 0.4350607805502239, + "grad_norm": 1.0302739143371582, + "learning_rate": 2.928403715292538e-05, + "loss": 0.8813, + "step": 170 + }, + { + "epoch": 0.44785668586052463, + "grad_norm": 0.9814358353614807, + "learning_rate": 2.921805456976016e-05, + "loss": 0.9343, + "step": 175 + }, + { + "epoch": 0.46065259117082535, + "grad_norm": 1.0007917881011963, + "learning_rate": 2.914924513926938e-05, + "loss": 0.8862, + "step": 180 + }, + { + "epoch": 0.473448496481126, + "grad_norm": 1.1029523611068726, + "learning_rate": 2.9077622542210405e-05, + "loss": 0.9189, + "step": 185 + }, + { + "epoch": 0.48624440179142675, + "grad_norm": 2.507399797439575, + "learning_rate": 2.9003201018657063e-05, + "loss": 0.869, + "step": 190 + }, + { + "epoch": 0.4990403071017274, + "grad_norm": 1.0367331504821777, + "learning_rate": 2.8925995365168474e-05, + "loss": 0.8484, + "step": 195 + }, + { + "epoch": 0.5118362124120281, + "grad_norm": 1.08120596408844, + "learning_rate": 2.8846020931847138e-05, + "loss": 0.8554, + "step": 200 + }, + { + "epoch": 0.5246321177223289, + "grad_norm": 1.0828542709350586, + "learning_rate": 2.8763293619287032e-05, + "loss": 0.8488, + "step": 205 + }, + { + "epoch": 0.5374280230326296, + "grad_norm": 1.1841707229614258, + "learning_rate": 2.867782987541225e-05, + "loss": 0.8786, + "step": 210 + }, + { + "epoch": 0.5502239283429302, + "grad_norm": 1.2651773691177368, + "learning_rate": 2.85896466922068e-05, + "loss": 0.7643, + "step": 215 + }, + { + "epoch": 0.5630198336532309, + "grad_norm": 1.1189494132995605, + "learning_rate": 2.849876160233623e-05, + "loss": 0.7748, + "step": 220 + }, + { + "epoch": 0.5758157389635317, + "grad_norm": 1.0990790128707886, + "learning_rate": 2.8405192675661782e-05, + "loss": 0.8274, + "step": 225 + }, + { + "epoch": 0.5886116442738324, + "grad_norm": 1.0771714448928833, + "learning_rate": 2.830895851564773e-05, + "loss": 0.7577, + "step": 230 + }, + { + "epoch": 0.6014075495841331, + "grad_norm": 1.1330010890960693, + "learning_rate": 2.82100782556626e-05, + "loss": 0.7859, + "step": 235 + }, + { + "epoch": 0.6142034548944337, + "grad_norm": 1.1280107498168945, + "learning_rate": 2.810857155517507e-05, + "loss": 0.7318, + "step": 240 + }, + { + "epoch": 0.6269993602047345, + "grad_norm": 1.302139163017273, + "learning_rate": 2.8004458595845253e-05, + "loss": 0.7051, + "step": 245 + }, + { + "epoch": 0.6397952655150352, + "grad_norm": 1.0455633401870728, + "learning_rate": 2.789776007751216e-05, + "loss": 0.7231, + "step": 250 + }, + { + "epoch": 0.6525911708253359, + "grad_norm": 1.2827589511871338, + "learning_rate": 2.778849721407814e-05, + "loss": 0.6457, + "step": 255 + }, + { + "epoch": 0.6653870761356366, + "grad_norm": 1.5184168815612793, + "learning_rate": 2.7676691729291103e-05, + "loss": 0.6564, + "step": 260 + }, + { + "epoch": 0.6781829814459372, + "grad_norm": 1.265804409980774, + "learning_rate": 2.756236585242539e-05, + "loss": 0.6682, + "step": 265 + }, + { + "epoch": 0.690978886756238, + "grad_norm": 1.1942700147628784, + "learning_rate": 2.744554231386213e-05, + "loss": 0.7241, + "step": 270 + }, + { + "epoch": 0.7037747920665387, + "grad_norm": 1.0899379253387451, + "learning_rate": 2.732624434056996e-05, + "loss": 0.6734, + "step": 275 + }, + { + "epoch": 0.7165706973768394, + "grad_norm": 1.2990151643753052, + "learning_rate": 2.720449565148701e-05, + "loss": 0.685, + "step": 280 + }, + { + "epoch": 0.7293666026871402, + "grad_norm": 1.252709984779358, + "learning_rate": 2.70803204528051e-05, + "loss": 0.7005, + "step": 285 + }, + { + "epoch": 0.7421625079974408, + "grad_norm": 1.2275372743606567, + "learning_rate": 2.695374343315702e-05, + "loss": 0.552, + "step": 290 + }, + { + "epoch": 0.7549584133077415, + "grad_norm": 1.3584619760513306, + "learning_rate": 2.6824789758707913e-05, + "loss": 0.6413, + "step": 295 + }, + { + "epoch": 0.7677543186180422, + "grad_norm": 1.1666145324707031, + "learning_rate": 2.6693485068151756e-05, + "loss": 0.6745, + "step": 300 + }, + { + "epoch": 0.780550223928343, + "grad_norm": 1.266880750656128, + "learning_rate": 2.6559855467613774e-05, + "loss": 0.5962, + "step": 305 + }, + { + "epoch": 0.7933461292386437, + "grad_norm": 1.1046452522277832, + "learning_rate": 2.6423927525460067e-05, + "loss": 0.6202, + "step": 310 + }, + { + "epoch": 0.8061420345489443, + "grad_norm": 1.2934908866882324, + "learning_rate": 2.6285728267015212e-05, + "loss": 0.5877, + "step": 315 + }, + { + "epoch": 0.818937939859245, + "grad_norm": 1.2493360042572021, + "learning_rate": 2.6145285169189106e-05, + "loss": 0.5432, + "step": 320 + }, + { + "epoch": 0.8317338451695457, + "grad_norm": 1.4390218257904053, + "learning_rate": 2.600262615501393e-05, + "loss": 0.6155, + "step": 325 + }, + { + "epoch": 0.8445297504798465, + "grad_norm": 1.2509618997573853, + "learning_rate": 2.5857779588092513e-05, + "loss": 0.5914, + "step": 330 + }, + { + "epoch": 0.8573256557901472, + "grad_norm": 1.2322059869766235, + "learning_rate": 2.5710774266959015e-05, + "loss": 0.5094, + "step": 335 + }, + { + "epoch": 0.8701215611004478, + "grad_norm": 1.5432853698730469, + "learning_rate": 2.55616394193532e-05, + "loss": 0.5541, + "step": 340 + }, + { + "epoch": 0.8829174664107485, + "grad_norm": 1.2042971849441528, + "learning_rate": 2.541040469640934e-05, + "loss": 0.5673, + "step": 345 + }, + { + "epoch": 0.8957133717210493, + "grad_norm": 1.3932462930679321, + "learning_rate": 2.5257100166760942e-05, + "loss": 0.5216, + "step": 350 + }, + { + "epoch": 0.90850927703135, + "grad_norm": 1.1966464519500732, + "learning_rate": 2.5101756310562493e-05, + "loss": 0.4703, + "step": 355 + }, + { + "epoch": 0.9213051823416507, + "grad_norm": 1.2079477310180664, + "learning_rate": 2.4944404013429323e-05, + "loss": 0.5386, + "step": 360 + }, + { + "epoch": 0.9341010876519513, + "grad_norm": 1.1918399333953857, + "learning_rate": 2.4785074560296953e-05, + "loss": 0.4892, + "step": 365 + }, + { + "epoch": 0.946896992962252, + "grad_norm": 1.3360567092895508, + "learning_rate": 2.462379962920096e-05, + "loss": 0.5367, + "step": 370 + }, + { + "epoch": 0.9596928982725528, + "grad_norm": 1.2915703058242798, + "learning_rate": 2.446061128497872e-05, + "loss": 0.5253, + "step": 375 + }, + { + "epoch": 0.9724888035828535, + "grad_norm": 1.2470221519470215, + "learning_rate": 2.429554197289426e-05, + "loss": 0.5298, + "step": 380 + }, + { + "epoch": 0.9852847088931542, + "grad_norm": 1.3628243207931519, + "learning_rate": 2.4128624512187444e-05, + "loss": 0.4539, + "step": 385 + }, + { + "epoch": 0.9980806142034548, + "grad_norm": 1.2769203186035156, + "learning_rate": 2.3959892089548844e-05, + "loss": 0.5826, + "step": 390 + }, + { + "epoch": 1.0102367242482406, + "grad_norm": 1.2804633378982544, + "learning_rate": 2.3789378252521497e-05, + "loss": 0.4716, + "step": 395 + }, + { + "epoch": 1.0230326295585412, + "grad_norm": 1.4392125606536865, + "learning_rate": 2.3617116902830967e-05, + "loss": 0.4073, + "step": 400 + }, + { + "epoch": 1.035828534868842, + "grad_norm": 1.2945060729980469, + "learning_rate": 2.3443142289644987e-05, + "loss": 0.3601, + "step": 405 + }, + { + "epoch": 1.0486244401791427, + "grad_norm": 1.2614574432373047, + "learning_rate": 2.3267489002763977e-05, + "loss": 0.4504, + "step": 410 + }, + { + "epoch": 1.0614203454894433, + "grad_norm": 1.416227102279663, + "learning_rate": 2.309019196574389e-05, + "loss": 0.4151, + "step": 415 + }, + { + "epoch": 1.0742162507997441, + "grad_norm": 1.3260927200317383, + "learning_rate": 2.2911286428952657e-05, + "loss": 0.4316, + "step": 420 + }, + { + "epoch": 1.0870121561100448, + "grad_norm": 1.2823888063430786, + "learning_rate": 2.2730807962561697e-05, + "loss": 0.3918, + "step": 425 + }, + { + "epoch": 1.0998080614203456, + "grad_norm": 1.2820522785186768, + "learning_rate": 2.25487924494738e-05, + "loss": 0.3991, + "step": 430 + }, + { + "epoch": 1.1126039667306462, + "grad_norm": 1.2439035177230835, + "learning_rate": 2.2365276078188864e-05, + "loss": 0.3884, + "step": 435 + }, + { + "epoch": 1.1253998720409468, + "grad_norm": 1.4880032539367676, + "learning_rate": 2.218029533560887e-05, + "loss": 0.3468, + "step": 440 + }, + { + "epoch": 1.1381957773512477, + "grad_norm": 1.3113714456558228, + "learning_rate": 2.19938869997835e-05, + "loss": 0.3944, + "step": 445 + }, + { + "epoch": 1.1509916826615483, + "grad_norm": 1.4202535152435303, + "learning_rate": 2.1806088132597914e-05, + "loss": 0.3454, + "step": 450 + }, + { + "epoch": 1.1637875879718491, + "grad_norm": 1.3202991485595703, + "learning_rate": 2.161693607240405e-05, + "loss": 0.4008, + "step": 455 + }, + { + "epoch": 1.1765834932821497, + "grad_norm": 1.5019029378890991, + "learning_rate": 2.142646842659699e-05, + "loss": 0.3988, + "step": 460 + }, + { + "epoch": 1.1893793985924503, + "grad_norm": 1.2289100885391235, + "learning_rate": 2.1234723064137814e-05, + "loss": 0.371, + "step": 465 + }, + { + "epoch": 1.2021753039027512, + "grad_norm": 1.3377835750579834, + "learning_rate": 2.1041738108024463e-05, + "loss": 0.3346, + "step": 470 + }, + { + "epoch": 1.2149712092130518, + "grad_norm": 1.1726585626602173, + "learning_rate": 2.084755192771208e-05, + "loss": 0.328, + "step": 475 + }, + { + "epoch": 1.2277671145233526, + "grad_norm": 1.3820908069610596, + "learning_rate": 2.0652203131484365e-05, + "loss": 0.3285, + "step": 480 + }, + { + "epoch": 1.2405630198336532, + "grad_norm": 1.4775786399841309, + "learning_rate": 2.0455730558777427e-05, + "loss": 0.3127, + "step": 485 + }, + { + "epoch": 1.2533589251439539, + "grad_norm": 1.1928763389587402, + "learning_rate": 2.0258173272457724e-05, + "loss": 0.3403, + "step": 490 + }, + { + "epoch": 1.2661548304542547, + "grad_norm": 1.2866144180297852, + "learning_rate": 2.005957055105548e-05, + "loss": 0.3204, + "step": 495 + }, + { + "epoch": 1.2789507357645553, + "grad_norm": 1.3136380910873413, + "learning_rate": 1.9859961880955373e-05, + "loss": 0.367, + "step": 500 + }, + { + "epoch": 1.2917466410748562, + "grad_norm": 1.28550386428833, + "learning_rate": 1.965938694854575e-05, + "loss": 0.3167, + "step": 505 + }, + { + "epoch": 1.3045425463851568, + "grad_norm": 1.2028549909591675, + "learning_rate": 1.9457885632328155e-05, + "loss": 0.3245, + "step": 510 + }, + { + "epoch": 1.3173384516954574, + "grad_norm": 1.251611590385437, + "learning_rate": 1.9255497994988672e-05, + "loss": 0.2803, + "step": 515 + }, + { + "epoch": 1.3301343570057582, + "grad_norm": 1.367486596107483, + "learning_rate": 1.9052264275432602e-05, + "loss": 0.2815, + "step": 520 + }, + { + "epoch": 1.3429302623160588, + "grad_norm": 1.1959351301193237, + "learning_rate": 1.8848224880784106e-05, + "loss": 0.3005, + "step": 525 + }, + { + "epoch": 1.3557261676263597, + "grad_norm": 1.3052616119384766, + "learning_rate": 1.8643420378352484e-05, + "loss": 0.301, + "step": 530 + }, + { + "epoch": 1.3685220729366603, + "grad_norm": 1.3535047769546509, + "learning_rate": 1.843789148756647e-05, + "loss": 0.2856, + "step": 535 + }, + { + "epoch": 1.381317978246961, + "grad_norm": 1.1741418838500977, + "learning_rate": 1.8231679071878406e-05, + "loss": 0.2638, + "step": 540 + }, + { + "epoch": 1.3941138835572617, + "grad_norm": 1.2906776666641235, + "learning_rate": 1.8024824130639707e-05, + "loss": 0.2791, + "step": 545 + }, + { + "epoch": 1.4069097888675623, + "grad_norm": 1.221483588218689, + "learning_rate": 1.7817367790949344e-05, + "loss": 0.2556, + "step": 550 + }, + { + "epoch": 1.4197056941778632, + "grad_norm": 1.3152070045471191, + "learning_rate": 1.7609351299476898e-05, + "loss": 0.2987, + "step": 555 + }, + { + "epoch": 1.4325015994881638, + "grad_norm": 1.288856863975525, + "learning_rate": 1.740081601426188e-05, + "loss": 0.2877, + "step": 560 + }, + { + "epoch": 1.4452975047984644, + "grad_norm": 1.2574849128723145, + "learning_rate": 1.719180339649087e-05, + "loss": 0.2965, + "step": 565 + }, + { + "epoch": 1.4580934101087653, + "grad_norm": 1.4481838941574097, + "learning_rate": 1.698235500225416e-05, + "loss": 0.2708, + "step": 570 + }, + { + "epoch": 1.4708893154190659, + "grad_norm": 1.4110863208770752, + "learning_rate": 1.6772512474283548e-05, + "loss": 0.3208, + "step": 575 + }, + { + "epoch": 1.4836852207293667, + "grad_norm": 1.4938619136810303, + "learning_rate": 1.6562317533672877e-05, + "loss": 0.2415, + "step": 580 + }, + { + "epoch": 1.4964811260396673, + "grad_norm": 1.3125752210617065, + "learning_rate": 1.6351811971583008e-05, + "loss": 0.254, + "step": 585 + }, + { + "epoch": 1.509277031349968, + "grad_norm": 1.233290195465088, + "learning_rate": 1.6141037640932882e-05, + "loss": 0.2774, + "step": 590 + }, + { + "epoch": 1.5220729366602685, + "grad_norm": 1.450292706489563, + "learning_rate": 1.5930036448078234e-05, + "loss": 0.2026, + "step": 595 + }, + { + "epoch": 1.5348688419705694, + "grad_norm": 1.1884843111038208, + "learning_rate": 1.5718850344479778e-05, + "loss": 0.253, + "step": 600 + }, + { + "epoch": 1.5476647472808702, + "grad_norm": 1.2242238521575928, + "learning_rate": 1.5507521318362372e-05, + "loss": 0.262, + "step": 605 + }, + { + "epoch": 1.5604606525911708, + "grad_norm": 1.1839346885681152, + "learning_rate": 1.529609138636685e-05, + "loss": 0.2279, + "step": 610 + }, + { + "epoch": 1.5732565579014715, + "grad_norm": 1.2601990699768066, + "learning_rate": 1.5084602585196249e-05, + "loss": 0.2373, + "step": 615 + }, + { + "epoch": 1.586052463211772, + "grad_norm": 1.2605071067810059, + "learning_rate": 1.4873096963258052e-05, + "loss": 0.2302, + "step": 620 + }, + { + "epoch": 1.598848368522073, + "grad_norm": 1.3458341360092163, + "learning_rate": 1.4661616572304036e-05, + "loss": 0.2418, + "step": 625 + }, + { + "epoch": 1.6116442738323737, + "grad_norm": 1.331021785736084, + "learning_rate": 1.445020345906955e-05, + "loss": 0.217, + "step": 630 + }, + { + "epoch": 1.6244401791426744, + "grad_norm": 1.3336098194122314, + "learning_rate": 1.423889965691372e-05, + "loss": 0.2141, + "step": 635 + }, + { + "epoch": 1.637236084452975, + "grad_norm": 1.2281140089035034, + "learning_rate": 1.4027747177462318e-05, + "loss": 0.2115, + "step": 640 + }, + { + "epoch": 1.6500319897632756, + "grad_norm": 1.2025333642959595, + "learning_rate": 1.3816788002255019e-05, + "loss": 0.2303, + "step": 645 + }, + { + "epoch": 1.6628278950735764, + "grad_norm": 1.2597026824951172, + "learning_rate": 1.3606064074398544e-05, + "loss": 0.2242, + "step": 650 + }, + { + "epoch": 1.6756238003838773, + "grad_norm": 1.3140994310379028, + "learning_rate": 1.3395617290227505e-05, + "loss": 0.2544, + "step": 655 + }, + { + "epoch": 1.6884197056941779, + "grad_norm": 1.2038415670394897, + "learning_rate": 1.3185489490974556e-05, + "loss": 0.2117, + "step": 660 + }, + { + "epoch": 1.7012156110044785, + "grad_norm": 1.20416259765625, + "learning_rate": 1.2975722454451454e-05, + "loss": 0.2386, + "step": 665 + }, + { + "epoch": 1.714011516314779, + "grad_norm": 1.2511985301971436, + "learning_rate": 1.2766357886742744e-05, + "loss": 0.2347, + "step": 670 + }, + { + "epoch": 1.72680742162508, + "grad_norm": 1.3303972482681274, + "learning_rate": 1.2557437413913767e-05, + "loss": 0.246, + "step": 675 + }, + { + "epoch": 1.7396033269353808, + "grad_norm": 1.260467767715454, + "learning_rate": 1.2349002573734469e-05, + "loss": 0.1935, + "step": 680 + }, + { + "epoch": 1.7523992322456814, + "grad_norm": 1.3908368349075317, + "learning_rate": 1.214109480742084e-05, + "loss": 0.2321, + "step": 685 + }, + { + "epoch": 1.765195137555982, + "grad_norm": 1.1935721635818481, + "learning_rate": 1.1933755451395556e-05, + "loss": 0.1985, + "step": 690 + }, + { + "epoch": 1.7779910428662828, + "grad_norm": 1.3129582405090332, + "learning_rate": 1.17270257290694e-05, + "loss": 0.1935, + "step": 695 + }, + { + "epoch": 1.7907869481765835, + "grad_norm": 1.3419909477233887, + "learning_rate": 1.1520946742645184e-05, + "loss": 0.228, + "step": 700 + }, + { + "epoch": 1.8035828534868843, + "grad_norm": 1.1662694215774536, + "learning_rate": 1.13155594649458e-05, + "loss": 0.2229, + "step": 705 + }, + { + "epoch": 1.816378758797185, + "grad_norm": 1.4214714765548706, + "learning_rate": 1.111090473126793e-05, + "loss": 0.1927, + "step": 710 + }, + { + "epoch": 1.8291746641074855, + "grad_norm": 1.1187121868133545, + "learning_rate": 1.0907023231263158e-05, + "loss": 0.1933, + "step": 715 + }, + { + "epoch": 1.8419705694177864, + "grad_norm": 1.1233196258544922, + "learning_rate": 1.0703955500847993e-05, + "loss": 0.1714, + "step": 720 + }, + { + "epoch": 1.854766474728087, + "grad_norm": 1.315608263015747, + "learning_rate": 1.050174191414449e-05, + "loss": 0.161, + "step": 725 + }, + { + "epoch": 1.8675623800383878, + "grad_norm": 1.2789711952209473, + "learning_rate": 1.0300422675453038e-05, + "loss": 0.2073, + "step": 730 + }, + { + "epoch": 1.8803582853486884, + "grad_norm": 1.178246021270752, + "learning_rate": 1.0100037811258878e-05, + "loss": 0.1798, + "step": 735 + }, + { + "epoch": 1.893154190658989, + "grad_norm": 1.3638640642166138, + "learning_rate": 9.900627162274017e-06, + "loss": 0.187, + "step": 740 + }, + { + "epoch": 1.9059500959692899, + "grad_norm": 1.3298636674880981, + "learning_rate": 9.702230375516064e-06, + "loss": 0.1722, + "step": 745 + }, + { + "epoch": 1.9187460012795905, + "grad_norm": 1.4497771263122559, + "learning_rate": 9.504886896425545e-06, + "loss": 0.1689, + "step": 750 + }, + { + "epoch": 1.9315419065898913, + "grad_norm": 1.3587476015090942, + "learning_rate": 9.308635961023348e-06, + "loss": 0.1628, + "step": 755 + }, + { + "epoch": 1.944337811900192, + "grad_norm": 1.2313709259033203, + "learning_rate": 9.113516588109773e-06, + "loss": 0.1758, + "step": 760 + }, + { + "epoch": 1.9571337172104926, + "grad_norm": 1.2906852960586548, + "learning_rate": 8.919567571506777e-06, + "loss": 0.1901, + "step": 765 + }, + { + "epoch": 1.9699296225207934, + "grad_norm": 1.167916178703308, + "learning_rate": 8.72682747234493e-06, + "loss": 0.1702, + "step": 770 + }, + { + "epoch": 1.982725527831094, + "grad_norm": 1.0892744064331055, + "learning_rate": 8.53533461139669e-06, + "loss": 0.1478, + "step": 775 + }, + { + "epoch": 1.9955214331413949, + "grad_norm": 1.1081629991531372, + "learning_rate": 8.3451270614574e-06, + "loss": 0.1746, + "step": 780 + }, + { + "epoch": 2.0076775431861806, + "grad_norm": 1.0554344654083252, + "learning_rate": 8.15624263977563e-06, + "loss": 0.1309, + "step": 785 + }, + { + "epoch": 2.0204734484964813, + "grad_norm": 0.9948863983154297, + "learning_rate": 7.968718900534311e-06, + "loss": 0.1462, + "step": 790 + }, + { + "epoch": 2.033269353806782, + "grad_norm": 1.0924488306045532, + "learning_rate": 7.782593127384184e-06, + "loss": 0.1231, + "step": 795 + }, + { + "epoch": 2.0460652591170825, + "grad_norm": 0.9814079999923706, + "learning_rate": 7.597902326031018e-06, + "loss": 0.1329, + "step": 800 + }, + { + "epoch": 2.058861164427383, + "grad_norm": 1.5390774011611938, + "learning_rate": 7.4146832168781085e-06, + "loss": 0.1354, + "step": 805 + }, + { + "epoch": 2.071657069737684, + "grad_norm": 1.0324681997299194, + "learning_rate": 7.232972227725485e-06, + "loss": 0.1293, + "step": 810 + }, + { + "epoch": 2.0844529750479848, + "grad_norm": 1.0238826274871826, + "learning_rate": 7.052805486527307e-06, + "loss": 0.1077, + "step": 815 + }, + { + "epoch": 2.0972488803582854, + "grad_norm": 1.1938427686691284, + "learning_rate": 6.874218814208863e-06, + "loss": 0.1373, + "step": 820 + }, + { + "epoch": 2.110044785668586, + "grad_norm": 1.0253716707229614, + "learning_rate": 6.6972477175446255e-06, + "loss": 0.1207, + "step": 825 + }, + { + "epoch": 2.1228406909788866, + "grad_norm": 1.0161418914794922, + "learning_rate": 6.521927382098753e-06, + "loss": 0.1285, + "step": 830 + }, + { + "epoch": 2.1356365962891877, + "grad_norm": 1.1127641201019287, + "learning_rate": 6.3482926652294695e-06, + "loss": 0.1379, + "step": 835 + }, + { + "epoch": 2.1484325015994883, + "grad_norm": 0.9611137509346008, + "learning_rate": 6.176378089158686e-06, + "loss": 0.1339, + "step": 840 + }, + { + "epoch": 2.161228406909789, + "grad_norm": 1.1142990589141846, + "learning_rate": 6.006217834108261e-06, + "loss": 0.1146, + "step": 845 + }, + { + "epoch": 2.1740243122200895, + "grad_norm": 0.9452466368675232, + "learning_rate": 5.8378457315042576e-06, + "loss": 0.1063, + "step": 850 + }, + { + "epoch": 2.18682021753039, + "grad_norm": 1.2314553260803223, + "learning_rate": 5.671295257250537e-06, + "loss": 0.1158, + "step": 855 + }, + { + "epoch": 2.199616122840691, + "grad_norm": 1.0659081935882568, + "learning_rate": 5.506599525073064e-06, + "loss": 0.1159, + "step": 860 + }, + { + "epoch": 2.212412028150992, + "grad_norm": 1.1217008829116821, + "learning_rate": 5.343791279936189e-06, + "loss": 0.1406, + "step": 865 + }, + { + "epoch": 2.2252079334612924, + "grad_norm": 1.161470890045166, + "learning_rate": 5.182902891532267e-06, + "loss": 0.1385, + "step": 870 + }, + { + "epoch": 2.238003838771593, + "grad_norm": 1.1703786849975586, + "learning_rate": 5.023966347845892e-06, + "loss": 0.1243, + "step": 875 + }, + { + "epoch": 2.2507997440818936, + "grad_norm": 0.9385775327682495, + "learning_rate": 4.867013248794e-06, + "loss": 0.1042, + "step": 880 + }, + { + "epoch": 2.2635956493921947, + "grad_norm": 0.8704110980033875, + "learning_rate": 4.712074799943158e-06, + "loss": 0.0941, + "step": 885 + }, + { + "epoch": 2.2763915547024953, + "grad_norm": 1.1330705881118774, + "learning_rate": 4.5591818063052315e-06, + "loss": 0.1213, + "step": 890 + }, + { + "epoch": 2.289187460012796, + "grad_norm": 0.8906403183937073, + "learning_rate": 4.408364666212712e-06, + "loss": 0.1005, + "step": 895 + }, + { + "epoch": 2.3019833653230966, + "grad_norm": 1.0460264682769775, + "learning_rate": 4.2596533652748836e-06, + "loss": 0.1218, + "step": 900 + }, + { + "epoch": 2.314779270633397, + "grad_norm": 1.0769453048706055, + "learning_rate": 4.113077470416057e-06, + "loss": 0.1128, + "step": 905 + }, + { + "epoch": 2.3275751759436982, + "grad_norm": 0.8864208459854126, + "learning_rate": 3.9686661239970466e-06, + "loss": 0.1268, + "step": 910 + }, + { + "epoch": 2.340371081253999, + "grad_norm": 1.0040526390075684, + "learning_rate": 3.8264480380210686e-06, + "loss": 0.1096, + "step": 915 + }, + { + "epoch": 2.3531669865642995, + "grad_norm": 0.9466296434402466, + "learning_rate": 3.6864514884251648e-06, + "loss": 0.0907, + "step": 920 + }, + { + "epoch": 2.3659628918746, + "grad_norm": 0.9947707056999207, + "learning_rate": 3.5487043094583756e-06, + "loss": 0.1081, + "step": 925 + }, + { + "epoch": 2.3787587971849007, + "grad_norm": 1.2030586004257202, + "learning_rate": 3.413233888147715e-06, + "loss": 0.1022, + "step": 930 + }, + { + "epoch": 2.3915547024952017, + "grad_norm": 0.8738510012626648, + "learning_rate": 3.280067158853034e-06, + "loss": 0.1079, + "step": 935 + }, + { + "epoch": 2.4043506078055024, + "grad_norm": 1.1237248182296753, + "learning_rate": 3.149230597911907e-06, + "loss": 0.1208, + "step": 940 + }, + { + "epoch": 2.417146513115803, + "grad_norm": 0.8492381572723389, + "learning_rate": 3.020750218375605e-06, + "loss": 0.0911, + "step": 945 + }, + { + "epoch": 2.4299424184261036, + "grad_norm": 0.9028100371360779, + "learning_rate": 2.8946515648371303e-06, + "loss": 0.0944, + "step": 950 + }, + { + "epoch": 2.442738323736404, + "grad_norm": 0.8454201817512512, + "learning_rate": 2.770959708352418e-06, + "loss": 0.1035, + "step": 955 + }, + { + "epoch": 2.4555342290467053, + "grad_norm": 1.2540916204452515, + "learning_rate": 2.6496992414557053e-06, + "loss": 0.1108, + "step": 960 + }, + { + "epoch": 2.468330134357006, + "grad_norm": 0.947363018989563, + "learning_rate": 2.530894273270002e-06, + "loss": 0.1144, + "step": 965 + }, + { + "epoch": 2.4811260396673065, + "grad_norm": 0.8354603052139282, + "learning_rate": 2.4145684247136807e-06, + "loss": 0.1019, + "step": 970 + }, + { + "epoch": 2.493921944977607, + "grad_norm": 1.0092355012893677, + "learning_rate": 2.300744823804181e-06, + "loss": 0.1023, + "step": 975 + }, + { + "epoch": 2.5067178502879077, + "grad_norm": 0.7925326824188232, + "learning_rate": 2.1894461010596396e-06, + "loss": 0.0953, + "step": 980 + }, + { + "epoch": 2.519513755598209, + "grad_norm": 0.7938242554664612, + "learning_rate": 2.080694384999469e-06, + "loss": 0.094, + "step": 985 + }, + { + "epoch": 2.5323096609085094, + "grad_norm": 0.7974414229393005, + "learning_rate": 1.974511297744782e-06, + "loss": 0.0862, + "step": 990 + }, + { + "epoch": 2.54510556621881, + "grad_norm": 0.9374391436576843, + "learning_rate": 1.8709179507194158e-06, + "loss": 0.0959, + "step": 995 + }, + { + "epoch": 2.5579014715291106, + "grad_norm": 0.6966494917869568, + "learning_rate": 1.769934940452554e-06, + "loss": 0.1034, + "step": 1000 + }, + { + "epoch": 2.5706973768394112, + "grad_norm": 0.8880689740180969, + "learning_rate": 1.6715823444837241e-06, + "loss": 0.0889, + "step": 1005 + }, + { + "epoch": 2.5834932821497123, + "grad_norm": 0.9491341710090637, + "learning_rate": 1.5758797173709327e-06, + "loss": 0.1131, + "step": 1010 + }, + { + "epoch": 2.596289187460013, + "grad_norm": 0.7817289233207703, + "learning_rate": 1.4828460868028277e-06, + "loss": 0.0961, + "step": 1015 + }, + { + "epoch": 2.6090850927703135, + "grad_norm": 0.7002028226852417, + "learning_rate": 1.3924999498155832e-06, + "loss": 0.079, + "step": 1020 + }, + { + "epoch": 2.621880998080614, + "grad_norm": 0.6855006814002991, + "learning_rate": 1.3048592691153137e-06, + "loss": 0.1006, + "step": 1025 + }, + { + "epoch": 2.6346769033909148, + "grad_norm": 0.7571349740028381, + "learning_rate": 1.2199414695067001e-06, + "loss": 0.0804, + "step": 1030 + }, + { + "epoch": 2.647472808701216, + "grad_norm": 0.9670151472091675, + "learning_rate": 1.1377634344285826e-06, + "loss": 0.1012, + "step": 1035 + }, + { + "epoch": 2.6602687140115164, + "grad_norm": 0.8406096696853638, + "learning_rate": 1.0583415025971693e-06, + "loss": 0.0956, + "step": 1040 + }, + { + "epoch": 2.673064619321817, + "grad_norm": 0.6418116092681885, + "learning_rate": 9.816914647575653e-07, + "loss": 0.1053, + "step": 1045 + }, + { + "epoch": 2.6858605246321177, + "grad_norm": 0.87382972240448, + "learning_rate": 9.078285605442365e-07, + "loss": 0.1056, + "step": 1050 + }, + { + "epoch": 2.6986564299424183, + "grad_norm": 0.7899898290634155, + "learning_rate": 8.36767475451054e-07, + "loss": 0.099, + "step": 1055 + }, + { + "epoch": 2.7114523352527193, + "grad_norm": 0.9738756418228149, + "learning_rate": 7.685223379115075e-07, + "loss": 0.1119, + "step": 1060 + }, + { + "epoch": 2.72424824056302, + "grad_norm": 0.7966146469116211, + "learning_rate": 7.031067164896776e-07, + "loss": 0.0953, + "step": 1065 + }, + { + "epoch": 2.7370441458733206, + "grad_norm": 0.7761546969413757, + "learning_rate": 6.405336171825222e-07, + "loss": 0.0968, + "step": 1070 + }, + { + "epoch": 2.749840051183621, + "grad_norm": 0.7942113876342773, + "learning_rate": 5.808154808340077e-07, + "loss": 0.0842, + "step": 1075 + }, + { + "epoch": 2.762635956493922, + "grad_norm": 0.7131679058074951, + "learning_rate": 5.239641806616119e-07, + "loss": 0.0835, + "step": 1080 + }, + { + "epoch": 2.775431861804223, + "grad_norm": 0.7599950432777405, + "learning_rate": 4.6999101989568136e-07, + "loss": 0.0865, + "step": 1085 + }, + { + "epoch": 2.7882277671145235, + "grad_norm": 0.8484867811203003, + "learning_rate": 4.1890672953210475e-07, + "loss": 0.0823, + "step": 1090 + }, + { + "epoch": 2.801023672424824, + "grad_norm": 0.8103335499763489, + "learning_rate": 3.70721466198774e-07, + "loss": 0.098, + "step": 1095 + }, + { + "epoch": 2.8138195777351247, + "grad_norm": 0.904417872428894, + "learning_rate": 3.2544481013622673e-07, + "loss": 0.0937, + "step": 1100 + }, + { + "epoch": 2.8266154830454253, + "grad_norm": 0.7618983387947083, + "learning_rate": 2.8308576329290125e-07, + "loss": 0.0818, + "step": 1105 + }, + { + "epoch": 2.8394113883557264, + "grad_norm": 0.8357146382331848, + "learning_rate": 2.436527475353517e-07, + "loss": 0.093, + "step": 1110 + }, + { + "epoch": 2.852207293666027, + "grad_norm": 0.8578983545303345, + "learning_rate": 2.0715360297381746e-07, + "loss": 0.0942, + "step": 1115 + }, + { + "epoch": 2.8650031989763276, + "grad_norm": 0.9789866805076599, + "learning_rate": 1.735955864034233e-07, + "loss": 0.0901, + "step": 1120 + }, + { + "epoch": 2.877799104286628, + "grad_norm": 0.740338146686554, + "learning_rate": 1.4298536986139865e-07, + "loss": 0.0808, + "step": 1125 + }, + { + "epoch": 2.890595009596929, + "grad_norm": 0.7498023509979248, + "learning_rate": 1.1532903930053018e-07, + "loss": 0.1012, + "step": 1130 + }, + { + "epoch": 2.90339091490723, + "grad_norm": 0.6988301873207092, + "learning_rate": 9.063209337913492e-08, + "loss": 0.1053, + "step": 1135 + }, + { + "epoch": 2.9161868202175305, + "grad_norm": 0.7759808301925659, + "learning_rate": 6.889944236782631e-08, + "loss": 0.0898, + "step": 1140 + }, + { + "epoch": 2.928982725527831, + "grad_norm": 0.8789666891098022, + "learning_rate": 5.0135407173245163e-08, + "loss": 0.0895, + "step": 1145 + }, + { + "epoch": 2.9417786308381317, + "grad_norm": 0.876502275466919, + "learning_rate": 3.434371847897022e-08, + "loss": 0.1056, + "step": 1150 + }, + { + "epoch": 2.9545745361484324, + "grad_norm": 0.8613495826721191, + "learning_rate": 2.1527516003781443e-08, + "loss": 0.0931, + "step": 1155 + }, + { + "epoch": 2.9673704414587334, + "grad_norm": 0.6973171830177307, + "learning_rate": 1.1689347877419377e-08, + "loss": 0.1049, + "step": 1160 + }, + { + "epoch": 2.980166346769034, + "grad_norm": 0.7596643567085266, + "learning_rate": 4.831170133960394e-09, + "loss": 0.0891, + "step": 1165 + }, + { + "epoch": 2.9929622520793346, + "grad_norm": 0.7772971391677856, + "learning_rate": 9.543463229177984e-10, + "loss": 0.0872, + "step": 1170 + }, + { + "epoch": 3.0, + "step": 1173, + "total_flos": 1.4623446908954214e+18, + "train_loss": 0.4343231242635976, + "train_runtime": 1174.5204, + "train_samples_per_second": 31.928, + "train_steps_per_second": 0.999 + } + ], + "logging_steps": 5, + "max_steps": 1173, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4623446908954214e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b687d1ac959a168de7c2afaed85589a416c80d2a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/16_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e06453d8e1cc3ebacbea8773d07723114e65e6eb0e69cdaaa83c7bb35e4382 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..addf4e52a3836ff5af0a0cb7a91ec45c1ee1b7b2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 17_128_e3_3e-5 + results: [] +--- + + + +# 17_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a21778b76a6ca9ffb2ac69a56403a682f58622fa --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "down_proj", + "o_proj", + "v_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ace19d060110bec4f418e37b201c23f50c3d82d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a049529bec09bb2d4004890adcd1dd522798e50ba35b2b91c230c8d327c9c22 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..96312f6d64f882bb92dac2678cbfed39fb6579e8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.6798389543182008e+18, + "train_loss": 0.43523811194961426, + "train_runtime": 1333.6816, + "train_samples": 14138, + "train_samples_per_second": 31.802, + "train_steps_per_second": 0.994 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..96312f6d64f882bb92dac2678cbfed39fb6579e8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.6798389543182008e+18, + "train_loss": 0.43523811194961426, + "train_runtime": 1333.6816, + "train_samples": 14138, + "train_samples_per_second": 31.802, + "train_steps_per_second": 0.994 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3ab331b6f7d32f5e86b536df6de603d9a80e27b3 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1898 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1326, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011312217194570135, + "grad_norm": 0.7023484706878662, + "learning_rate": 1.791044776119403e-06, + "loss": 1.5985, + "step": 5 + }, + { + "epoch": 0.02262443438914027, + "grad_norm": 0.6193110942840576, + "learning_rate": 4.029850746268657e-06, + "loss": 1.5737, + "step": 10 + }, + { + "epoch": 0.033936651583710405, + "grad_norm": 0.5847377181053162, + "learning_rate": 6.268656716417911e-06, + "loss": 1.611, + "step": 15 + }, + { + "epoch": 0.04524886877828054, + "grad_norm": 0.5629210472106934, + "learning_rate": 8.507462686567164e-06, + "loss": 1.5678, + "step": 20 + }, + { + "epoch": 0.05656108597285068, + "grad_norm": 0.5497866272926331, + "learning_rate": 1.0746268656716418e-05, + "loss": 1.5899, + "step": 25 + }, + { + "epoch": 0.06787330316742081, + "grad_norm": 0.5367090106010437, + "learning_rate": 1.2985074626865672e-05, + "loss": 1.5732, + "step": 30 + }, + { + "epoch": 0.07918552036199095, + "grad_norm": 0.46884873509407043, + "learning_rate": 1.5223880597014927e-05, + "loss": 1.5401, + "step": 35 + }, + { + "epoch": 0.09049773755656108, + "grad_norm": 0.5197533369064331, + "learning_rate": 1.746268656716418e-05, + "loss": 1.5105, + "step": 40 + }, + { + "epoch": 0.10180995475113122, + "grad_norm": 0.611090362071991, + "learning_rate": 1.9701492537313435e-05, + "loss": 1.5983, + "step": 45 + }, + { + "epoch": 0.11312217194570136, + "grad_norm": 0.5282735824584961, + "learning_rate": 2.194029850746269e-05, + "loss": 1.4664, + "step": 50 + }, + { + "epoch": 0.1244343891402715, + "grad_norm": 0.5218468308448792, + "learning_rate": 2.417910447761194e-05, + "loss": 1.5058, + "step": 55 + }, + { + "epoch": 0.13574660633484162, + "grad_norm": 0.546008825302124, + "learning_rate": 2.6417910447761193e-05, + "loss": 1.4197, + "step": 60 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 0.6681496500968933, + "learning_rate": 2.8656716417910447e-05, + "loss": 1.4327, + "step": 65 + }, + { + "epoch": 0.1583710407239819, + "grad_norm": 0.6175218224525452, + "learning_rate": 2.9999813203541335e-05, + "loss": 1.4061, + "step": 70 + }, + { + "epoch": 0.16968325791855204, + "grad_norm": 0.6165537238121033, + "learning_rate": 2.9997711796810774e-05, + "loss": 1.3551, + "step": 75 + }, + { + "epoch": 0.18099547511312217, + "grad_norm": 0.5701508522033691, + "learning_rate": 2.9993275815975943e-05, + "loss": 1.3316, + "step": 80 + }, + { + "epoch": 0.19230769230769232, + "grad_norm": 0.8726045489311218, + "learning_rate": 2.9986505951550574e-05, + "loss": 1.3509, + "step": 85 + }, + { + "epoch": 0.20361990950226244, + "grad_norm": 0.6887842416763306, + "learning_rate": 2.9977403257345435e-05, + "loss": 1.3259, + "step": 90 + }, + { + "epoch": 0.2149321266968326, + "grad_norm": 0.7185653448104858, + "learning_rate": 2.996596915030429e-05, + "loss": 1.2595, + "step": 95 + }, + { + "epoch": 0.22624434389140272, + "grad_norm": 0.6435264348983765, + "learning_rate": 2.995220541028333e-05, + "loss": 1.3171, + "step": 100 + }, + { + "epoch": 0.23755656108597284, + "grad_norm": 0.6989210844039917, + "learning_rate": 2.9936114179774118e-05, + "loss": 1.2547, + "step": 105 + }, + { + "epoch": 0.248868778280543, + "grad_norm": 0.7270306348800659, + "learning_rate": 2.991769796357009e-05, + "loss": 1.2183, + "step": 110 + }, + { + "epoch": 0.26018099547511314, + "grad_norm": 0.7561452984809875, + "learning_rate": 2.9896959628376653e-05, + "loss": 1.2084, + "step": 115 + }, + { + "epoch": 0.27149321266968324, + "grad_norm": 0.7901420593261719, + "learning_rate": 2.987390240236494e-05, + "loss": 1.1851, + "step": 120 + }, + { + "epoch": 0.2828054298642534, + "grad_norm": 0.814742386341095, + "learning_rate": 2.984852987466931e-05, + "loss": 1.1715, + "step": 125 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8176323771476746, + "learning_rate": 2.982084599482867e-05, + "loss": 1.1617, + "step": 130 + }, + { + "epoch": 0.3054298642533937, + "grad_norm": 0.9063343405723572, + "learning_rate": 2.979085507217165e-05, + "loss": 1.1723, + "step": 135 + }, + { + "epoch": 0.3167420814479638, + "grad_norm": 0.8086380362510681, + "learning_rate": 2.9758561775145837e-05, + "loss": 1.1039, + "step": 140 + }, + { + "epoch": 0.32805429864253394, + "grad_norm": 0.8639967441558838, + "learning_rate": 2.9723971130591053e-05, + "loss": 1.0475, + "step": 145 + }, + { + "epoch": 0.3393665158371041, + "grad_norm": 0.8877090811729431, + "learning_rate": 2.9687088522956894e-05, + "loss": 1.1028, + "step": 150 + }, + { + "epoch": 0.3506787330316742, + "grad_norm": 0.8818389177322388, + "learning_rate": 2.9647919693464532e-05, + "loss": 1.0464, + "step": 155 + }, + { + "epoch": 0.36199095022624433, + "grad_norm": 0.905845046043396, + "learning_rate": 2.9606470739213066e-05, + "loss": 1.0938, + "step": 160 + }, + { + "epoch": 0.3733031674208145, + "grad_norm": 1.126086711883545, + "learning_rate": 2.956274811223042e-05, + "loss": 0.9619, + "step": 165 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 0.8744023442268372, + "learning_rate": 2.9516758618468994e-05, + "loss": 1.0419, + "step": 170 + }, + { + "epoch": 0.39592760180995473, + "grad_norm": 1.0396664142608643, + "learning_rate": 2.9468509416746267e-05, + "loss": 0.9662, + "step": 175 + }, + { + "epoch": 0.4072398190045249, + "grad_norm": 0.949018657207489, + "learning_rate": 2.9418008017630402e-05, + "loss": 0.9318, + "step": 180 + }, + { + "epoch": 0.41855203619909503, + "grad_norm": 1.1318162679672241, + "learning_rate": 2.9365262282271173e-05, + "loss": 0.978, + "step": 185 + }, + { + "epoch": 0.4298642533936652, + "grad_norm": 1.1023359298706055, + "learning_rate": 2.9310280421176255e-05, + "loss": 0.9916, + "step": 190 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 1.3056201934814453, + "learning_rate": 2.925307099293318e-05, + "loss": 0.9159, + "step": 195 + }, + { + "epoch": 0.45248868778280543, + "grad_norm": 1.0770295858383179, + "learning_rate": 2.9193642902877077e-05, + "loss": 0.9058, + "step": 200 + }, + { + "epoch": 0.4638009049773756, + "grad_norm": 1.0106678009033203, + "learning_rate": 2.9132005401704442e-05, + "loss": 0.9319, + "step": 205 + }, + { + "epoch": 0.4751131221719457, + "grad_norm": 0.9532214403152466, + "learning_rate": 2.906816808403319e-05, + "loss": 0.9813, + "step": 210 + }, + { + "epoch": 0.48642533936651583, + "grad_norm": 1.1295033693313599, + "learning_rate": 2.9002140886909087e-05, + "loss": 0.8235, + "step": 215 + }, + { + "epoch": 0.497737556561086, + "grad_norm": 1.1219044923782349, + "learning_rate": 2.893393408825898e-05, + "loss": 0.8016, + "step": 220 + }, + { + "epoch": 0.5090497737556561, + "grad_norm": 1.3662282228469849, + "learning_rate": 2.886355830529088e-05, + "loss": 0.8108, + "step": 225 + }, + { + "epoch": 0.5203619909502263, + "grad_norm": 1.1711231470108032, + "learning_rate": 2.8791024492841274e-05, + "loss": 0.8329, + "step": 230 + }, + { + "epoch": 0.5316742081447964, + "grad_norm": 1.1930088996887207, + "learning_rate": 2.8716343941669888e-05, + "loss": 0.8134, + "step": 235 + }, + { + "epoch": 0.5429864253393665, + "grad_norm": 1.138051152229309, + "learning_rate": 2.863952827670212e-05, + "loss": 0.8623, + "step": 240 + }, + { + "epoch": 0.5542986425339367, + "grad_norm": 1.1121857166290283, + "learning_rate": 2.8560589455219503e-05, + "loss": 0.7287, + "step": 245 + }, + { + "epoch": 0.5656108597285068, + "grad_norm": 1.149695634841919, + "learning_rate": 2.8479539764998393e-05, + "loss": 0.818, + "step": 250 + }, + { + "epoch": 0.5769230769230769, + "grad_norm": 1.187175989151001, + "learning_rate": 2.8396391822397238e-05, + "loss": 0.807, + "step": 255 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1272649765014648, + "learning_rate": 2.8311158570392694e-05, + "loss": 0.7338, + "step": 260 + }, + { + "epoch": 0.5995475113122172, + "grad_norm": 1.1579320430755615, + "learning_rate": 2.822385327656488e-05, + "loss": 0.7448, + "step": 265 + }, + { + "epoch": 0.6108597285067874, + "grad_norm": 1.1548452377319336, + "learning_rate": 2.8134489531032144e-05, + "loss": 0.7287, + "step": 270 + }, + { + "epoch": 0.6221719457013575, + "grad_norm": 1.2171094417572021, + "learning_rate": 2.804308124433557e-05, + "loss": 0.758, + "step": 275 + }, + { + "epoch": 0.6334841628959276, + "grad_norm": 1.2222726345062256, + "learning_rate": 2.794964264527365e-05, + "loss": 0.6635, + "step": 280 + }, + { + "epoch": 0.6447963800904978, + "grad_norm": 1.2666122913360596, + "learning_rate": 2.78541882786874e-05, + "loss": 0.7338, + "step": 285 + }, + { + "epoch": 0.6561085972850679, + "grad_norm": 1.1256541013717651, + "learning_rate": 2.7756733003196287e-05, + "loss": 0.7314, + "step": 290 + }, + { + "epoch": 0.667420814479638, + "grad_norm": 1.173448920249939, + "learning_rate": 2.765729198888529e-05, + "loss": 0.7048, + "step": 295 + }, + { + "epoch": 0.6787330316742082, + "grad_norm": 1.2395395040512085, + "learning_rate": 2.7555880714943506e-05, + "loss": 0.7007, + "step": 300 + }, + { + "epoch": 0.6900452488687783, + "grad_norm": 1.2983155250549316, + "learning_rate": 2.745251496725462e-05, + "loss": 0.6821, + "step": 305 + }, + { + "epoch": 0.7013574660633484, + "grad_norm": 1.2760140895843506, + "learning_rate": 2.7347210835939657e-05, + "loss": 0.6522, + "step": 310 + }, + { + "epoch": 0.7126696832579186, + "grad_norm": 1.21170973777771, + "learning_rate": 2.7239984712852344e-05, + "loss": 0.6698, + "step": 315 + }, + { + "epoch": 0.7239819004524887, + "grad_norm": 1.181296706199646, + "learning_rate": 2.7130853289027526e-05, + "loss": 0.6847, + "step": 320 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.3183116912841797, + "learning_rate": 2.7019833552083016e-05, + "loss": 0.6262, + "step": 325 + }, + { + "epoch": 0.746606334841629, + "grad_norm": 1.1561964750289917, + "learning_rate": 2.6906942783575258e-05, + "loss": 0.6226, + "step": 330 + }, + { + "epoch": 0.7579185520361991, + "grad_norm": 1.266079306602478, + "learning_rate": 2.679219855630925e-05, + "loss": 0.6138, + "step": 335 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.2360799312591553, + "learning_rate": 2.6675618731603107e-05, + "loss": 0.5965, + "step": 340 + }, + { + "epoch": 0.7805429864253394, + "grad_norm": 1.3071038722991943, + "learning_rate": 2.6557221456507775e-05, + "loss": 0.5884, + "step": 345 + }, + { + "epoch": 0.7918552036199095, + "grad_norm": 1.2023221254348755, + "learning_rate": 2.643702516098218e-05, + "loss": 0.6346, + "step": 350 + }, + { + "epoch": 0.8031674208144797, + "grad_norm": 1.2283083200454712, + "learning_rate": 2.6315048555024396e-05, + "loss": 0.5819, + "step": 355 + }, + { + "epoch": 0.8144796380090498, + "grad_norm": 1.37956964969635, + "learning_rate": 2.6191310625759232e-05, + "loss": 0.6037, + "step": 360 + }, + { + "epoch": 0.8257918552036199, + "grad_norm": 1.3345836400985718, + "learning_rate": 2.6065830634482625e-05, + "loss": 0.591, + "step": 365 + }, + { + "epoch": 0.8371040723981901, + "grad_norm": 1.3050849437713623, + "learning_rate": 2.5938628113663415e-05, + "loss": 0.5404, + "step": 370 + }, + { + "epoch": 0.8484162895927602, + "grad_norm": 1.451611042022705, + "learning_rate": 2.5809722863902857e-05, + "loss": 0.5581, + "step": 375 + }, + { + "epoch": 0.8597285067873304, + "grad_norm": 1.1759064197540283, + "learning_rate": 2.567913495085244e-05, + "loss": 0.4953, + "step": 380 + }, + { + "epoch": 0.8710407239819005, + "grad_norm": 1.442272663116455, + "learning_rate": 2.554688470209041e-05, + "loss": 0.5804, + "step": 385 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.2875120639801025, + "learning_rate": 2.5412992703957556e-05, + "loss": 0.5185, + "step": 390 + }, + { + "epoch": 0.8936651583710408, + "grad_norm": 1.4541233777999878, + "learning_rate": 2.5277479798352682e-05, + "loss": 0.4639, + "step": 395 + }, + { + "epoch": 0.9049773755656109, + "grad_norm": 1.167067289352417, + "learning_rate": 2.514036707948833e-05, + "loss": 0.4845, + "step": 400 + }, + { + "epoch": 0.916289592760181, + "grad_norm": 1.3148740530014038, + "learning_rate": 2.5001675890607195e-05, + "loss": 0.4959, + "step": 405 + }, + { + "epoch": 0.9276018099547512, + "grad_norm": 1.2642793655395508, + "learning_rate": 2.4861427820659813e-05, + "loss": 0.577, + "step": 410 + }, + { + "epoch": 0.9389140271493213, + "grad_norm": 1.3073261976242065, + "learning_rate": 2.471964470094396e-05, + "loss": 0.4662, + "step": 415 + }, + { + "epoch": 0.9502262443438914, + "grad_norm": 1.2139924764633179, + "learning_rate": 2.4576348601706366e-05, + "loss": 0.5449, + "step": 420 + }, + { + "epoch": 0.9615384615384616, + "grad_norm": 1.358360767364502, + "learning_rate": 2.4431561828707208e-05, + "loss": 0.5047, + "step": 425 + }, + { + "epoch": 0.9728506787330317, + "grad_norm": 1.4146969318389893, + "learning_rate": 2.428530691974795e-05, + "loss": 0.4876, + "step": 430 + }, + { + "epoch": 0.9841628959276018, + "grad_norm": 1.2291405200958252, + "learning_rate": 2.4137606641163064e-05, + "loss": 0.5216, + "step": 435 + }, + { + "epoch": 0.995475113122172, + "grad_norm": 1.4155473709106445, + "learning_rate": 2.3988483984276174e-05, + "loss": 0.4555, + "step": 440 + }, + { + "epoch": 1.006787330316742, + "grad_norm": 1.2198760509490967, + "learning_rate": 2.3837962161821183e-05, + "loss": 0.4846, + "step": 445 + }, + { + "epoch": 1.0180995475113122, + "grad_norm": 1.5185407400131226, + "learning_rate": 2.368606460432894e-05, + "loss": 0.3724, + "step": 450 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 1.397998571395874, + "learning_rate": 2.353281495647998e-05, + "loss": 0.4504, + "step": 455 + }, + { + "epoch": 1.0407239819004526, + "grad_norm": 1.3978341817855835, + "learning_rate": 2.3378237073423957e-05, + "loss": 0.4316, + "step": 460 + }, + { + "epoch": 1.0520361990950227, + "grad_norm": 1.186639666557312, + "learning_rate": 2.322235501706629e-05, + "loss": 0.3953, + "step": 465 + }, + { + "epoch": 1.0633484162895928, + "grad_norm": 1.5447667837142944, + "learning_rate": 2.3065193052322667e-05, + "loss": 0.3788, + "step": 470 + }, + { + "epoch": 1.0746606334841629, + "grad_norm": 1.2790387868881226, + "learning_rate": 2.2906775643341883e-05, + "loss": 0.4103, + "step": 475 + }, + { + "epoch": 1.085972850678733, + "grad_norm": 1.7914854288101196, + "learning_rate": 2.274712744969772e-05, + "loss": 0.4215, + "step": 480 + }, + { + "epoch": 1.0972850678733033, + "grad_norm": 1.3040345907211304, + "learning_rate": 2.2586273322550404e-05, + "loss": 0.3868, + "step": 485 + }, + { + "epoch": 1.1085972850678734, + "grad_norm": 1.2467855215072632, + "learning_rate": 2.2424238300778176e-05, + "loss": 0.3679, + "step": 490 + }, + { + "epoch": 1.1199095022624435, + "grad_norm": 1.176059603691101, + "learning_rate": 2.226104760707974e-05, + "loss": 0.3996, + "step": 495 + }, + { + "epoch": 1.1312217194570136, + "grad_norm": 1.4030050039291382, + "learning_rate": 2.2096726644048016e-05, + "loss": 0.3598, + "step": 500 + }, + { + "epoch": 1.1425339366515836, + "grad_norm": 1.3774036169052124, + "learning_rate": 2.1931300990215943e-05, + "loss": 0.347, + "step": 505 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 1.3611479997634888, + "learning_rate": 2.176479639607485e-05, + "loss": 0.3849, + "step": 510 + }, + { + "epoch": 1.165158371040724, + "grad_norm": 1.3484176397323608, + "learning_rate": 2.159723878006609e-05, + "loss": 0.3344, + "step": 515 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 1.3295278549194336, + "learning_rate": 2.142865422454654e-05, + "loss": 0.3968, + "step": 520 + }, + { + "epoch": 1.1877828054298643, + "grad_norm": 1.3752135038375854, + "learning_rate": 2.1259068971728547e-05, + "loss": 0.3451, + "step": 525 + }, + { + "epoch": 1.1990950226244343, + "grad_norm": 1.2674609422683716, + "learning_rate": 2.1088509419595007e-05, + "loss": 0.3315, + "step": 530 + }, + { + "epoch": 1.2104072398190044, + "grad_norm": 1.4603493213653564, + "learning_rate": 2.0917002117790247e-05, + "loss": 0.3593, + "step": 535 + }, + { + "epoch": 1.2217194570135748, + "grad_norm": 1.179242491722107, + "learning_rate": 2.0744573763487195e-05, + "loss": 0.3399, + "step": 540 + }, + { + "epoch": 1.2330316742081449, + "grad_norm": 1.2412261962890625, + "learning_rate": 2.057125119723168e-05, + "loss": 0.348, + "step": 545 + }, + { + "epoch": 1.244343891402715, + "grad_norm": 1.3290064334869385, + "learning_rate": 2.0397061398764367e-05, + "loss": 0.3206, + "step": 550 + }, + { + "epoch": 1.255656108597285, + "grad_norm": 1.2654874324798584, + "learning_rate": 2.0222031482821033e-05, + "loss": 0.3177, + "step": 555 + }, + { + "epoch": 1.2669683257918551, + "grad_norm": 1.232564926147461, + "learning_rate": 2.004618869491186e-05, + "loss": 0.3052, + "step": 560 + }, + { + "epoch": 1.2782805429864252, + "grad_norm": 1.3415547609329224, + "learning_rate": 1.9869560407080295e-05, + "loss": 0.2904, + "step": 565 + }, + { + "epoch": 1.2895927601809956, + "grad_norm": 1.477321982383728, + "learning_rate": 1.9692174113642307e-05, + "loss": 0.3367, + "step": 570 + }, + { + "epoch": 1.3009049773755657, + "grad_norm": 1.3388049602508545, + "learning_rate": 1.9514057426906536e-05, + "loss": 0.3112, + "step": 575 + }, + { + "epoch": 1.3122171945701357, + "grad_norm": 1.4969874620437622, + "learning_rate": 1.933523807287612e-05, + "loss": 0.3072, + "step": 580 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 1.2286587953567505, + "learning_rate": 1.9155743886932825e-05, + "loss": 0.321, + "step": 585 + }, + { + "epoch": 1.334841628959276, + "grad_norm": 1.4938592910766602, + "learning_rate": 1.8975602809504086e-05, + "loss": 0.3214, + "step": 590 + }, + { + "epoch": 1.3461538461538463, + "grad_norm": 1.1678742170333862, + "learning_rate": 1.8794842881713793e-05, + "loss": 0.3038, + "step": 595 + }, + { + "epoch": 1.3574660633484164, + "grad_norm": 1.345304250717163, + "learning_rate": 1.861349224101733e-05, + "loss": 0.2927, + "step": 600 + }, + { + "epoch": 1.3687782805429864, + "grad_norm": 1.3947763442993164, + "learning_rate": 1.8431579116821643e-05, + "loss": 0.2827, + "step": 605 + }, + { + "epoch": 1.3800904977375565, + "grad_norm": 1.4334254264831543, + "learning_rate": 1.824913182609099e-05, + "loss": 0.2813, + "step": 610 + }, + { + "epoch": 1.3914027149321266, + "grad_norm": 1.3010629415512085, + "learning_rate": 1.806617876893907e-05, + "loss": 0.3018, + "step": 615 + }, + { + "epoch": 1.4027149321266967, + "grad_norm": 1.2858314514160156, + "learning_rate": 1.7882748424208227e-05, + "loss": 0.2738, + "step": 620 + }, + { + "epoch": 1.4140271493212668, + "grad_norm": 1.2971038818359375, + "learning_rate": 1.7698869345036323e-05, + "loss": 0.2808, + "step": 625 + }, + { + "epoch": 1.4253393665158371, + "grad_norm": 1.2110191583633423, + "learning_rate": 1.7514570154412146e-05, + "loss": 0.2876, + "step": 630 + }, + { + "epoch": 1.4366515837104072, + "grad_norm": 1.6380356550216675, + "learning_rate": 1.7329879540719878e-05, + "loss": 0.2831, + "step": 635 + }, + { + "epoch": 1.4479638009049773, + "grad_norm": 1.2307326793670654, + "learning_rate": 1.7144826253273405e-05, + "loss": 0.3137, + "step": 640 + }, + { + "epoch": 1.4592760180995474, + "grad_norm": 1.292812466621399, + "learning_rate": 1.6959439097841134e-05, + "loss": 0.2657, + "step": 645 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.166748046875, + "learning_rate": 1.6773746932162063e-05, + "loss": 0.2608, + "step": 650 + }, + { + "epoch": 1.4819004524886878, + "grad_norm": 1.1987972259521484, + "learning_rate": 1.6587778661453674e-05, + "loss": 0.2921, + "step": 655 + }, + { + "epoch": 1.493212669683258, + "grad_norm": 1.259569525718689, + "learning_rate": 1.6401563233912527e-05, + "loss": 0.2291, + "step": 660 + }, + { + "epoch": 1.504524886877828, + "grad_norm": 1.3507957458496094, + "learning_rate": 1.6215129636208106e-05, + "loss": 0.2719, + "step": 665 + }, + { + "epoch": 1.5158371040723981, + "grad_norm": 1.2996076345443726, + "learning_rate": 1.6028506888970708e-05, + "loss": 0.2818, + "step": 670 + }, + { + "epoch": 1.5271493212669682, + "grad_norm": 1.3597135543823242, + "learning_rate": 1.584172404227404e-05, + "loss": 0.2349, + "step": 675 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 1.3006099462509155, + "learning_rate": 1.5654810171113197e-05, + "loss": 0.2587, + "step": 680 + }, + { + "epoch": 1.5497737556561086, + "grad_norm": 1.2175633907318115, + "learning_rate": 1.546779437087881e-05, + "loss": 0.2298, + "step": 685 + }, + { + "epoch": 1.5610859728506787, + "grad_norm": 1.364903450012207, + "learning_rate": 1.5280705752828e-05, + "loss": 0.2302, + "step": 690 + }, + { + "epoch": 1.5723981900452488, + "grad_norm": 1.2236790657043457, + "learning_rate": 1.5093573439552856e-05, + "loss": 0.2169, + "step": 695 + }, + { + "epoch": 1.5837104072398192, + "grad_norm": 1.2389367818832397, + "learning_rate": 1.4906426560447147e-05, + "loss": 0.2666, + "step": 700 + }, + { + "epoch": 1.5950226244343892, + "grad_norm": 1.2264719009399414, + "learning_rate": 1.4719294247172007e-05, + "loss": 0.2389, + "step": 705 + }, + { + "epoch": 1.6063348416289593, + "grad_norm": 1.2285138368606567, + "learning_rate": 1.4532205629121196e-05, + "loss": 0.2436, + "step": 710 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 1.444098949432373, + "learning_rate": 1.4345189828886806e-05, + "loss": 0.2383, + "step": 715 + }, + { + "epoch": 1.6289592760180995, + "grad_norm": 1.2639408111572266, + "learning_rate": 1.4158275957725964e-05, + "loss": 0.2289, + "step": 720 + }, + { + "epoch": 1.6402714932126696, + "grad_norm": 1.2073606252670288, + "learning_rate": 1.3971493111029293e-05, + "loss": 0.2345, + "step": 725 + }, + { + "epoch": 1.6515837104072397, + "grad_norm": 1.3921147584915161, + "learning_rate": 1.3784870363791903e-05, + "loss": 0.2442, + "step": 730 + }, + { + "epoch": 1.6628959276018098, + "grad_norm": 1.17991042137146, + "learning_rate": 1.3598436766087479e-05, + "loss": 0.1917, + "step": 735 + }, + { + "epoch": 1.6742081447963801, + "grad_norm": 1.3274710178375244, + "learning_rate": 1.341222133854633e-05, + "loss": 0.2107, + "step": 740 + }, + { + "epoch": 1.6855203619909502, + "grad_norm": 1.2456737756729126, + "learning_rate": 1.322625306783794e-05, + "loss": 0.2399, + "step": 745 + }, + { + "epoch": 1.6968325791855203, + "grad_norm": 1.2316844463348389, + "learning_rate": 1.3040560902158862e-05, + "loss": 0.1865, + "step": 750 + }, + { + "epoch": 1.7081447963800906, + "grad_norm": 1.397672176361084, + "learning_rate": 1.2855173746726602e-05, + "loss": 0.2029, + "step": 755 + }, + { + "epoch": 1.7194570135746607, + "grad_norm": 1.3828132152557373, + "learning_rate": 1.2670120459280128e-05, + "loss": 0.2163, + "step": 760 + }, + { + "epoch": 1.7307692307692308, + "grad_norm": 1.3692728281021118, + "learning_rate": 1.2485429845587862e-05, + "loss": 0.2296, + "step": 765 + }, + { + "epoch": 1.742081447963801, + "grad_norm": 1.0443191528320312, + "learning_rate": 1.230113065496368e-05, + "loss": 0.1998, + "step": 770 + }, + { + "epoch": 1.753393665158371, + "grad_norm": 1.5142840147018433, + "learning_rate": 1.2117251575791775e-05, + "loss": 0.223, + "step": 775 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 1.3820034265518188, + "learning_rate": 1.1933821231060932e-05, + "loss": 0.2115, + "step": 780 + }, + { + "epoch": 1.7760180995475112, + "grad_norm": 1.2880628108978271, + "learning_rate": 1.1750868173909014e-05, + "loss": 0.2051, + "step": 785 + }, + { + "epoch": 1.7873303167420813, + "grad_norm": 1.446811556816101, + "learning_rate": 1.1568420883178363e-05, + "loss": 0.1681, + "step": 790 + }, + { + "epoch": 1.7986425339366516, + "grad_norm": 1.300570011138916, + "learning_rate": 1.1386507758982672e-05, + "loss": 0.2205, + "step": 795 + }, + { + "epoch": 1.8099547511312217, + "grad_norm": 1.2325433492660522, + "learning_rate": 1.1205157118286203e-05, + "loss": 0.2212, + "step": 800 + }, + { + "epoch": 1.8212669683257918, + "grad_norm": 1.2984925508499146, + "learning_rate": 1.1024397190495915e-05, + "loss": 0.2152, + "step": 805 + }, + { + "epoch": 1.8325791855203621, + "grad_norm": 1.2968674898147583, + "learning_rate": 1.0844256113067177e-05, + "loss": 0.183, + "step": 810 + }, + { + "epoch": 1.8438914027149322, + "grad_norm": 1.2306768894195557, + "learning_rate": 1.0664761927123882e-05, + "loss": 0.1705, + "step": 815 + }, + { + "epoch": 1.8552036199095023, + "grad_norm": 1.1228686571121216, + "learning_rate": 1.0485942573093468e-05, + "loss": 0.1793, + "step": 820 + }, + { + "epoch": 1.8665158371040724, + "grad_norm": 1.1465085744857788, + "learning_rate": 1.0307825886357697e-05, + "loss": 0.1675, + "step": 825 + }, + { + "epoch": 1.8778280542986425, + "grad_norm": 1.1275529861450195, + "learning_rate": 1.0130439592919706e-05, + "loss": 0.1631, + "step": 830 + }, + { + "epoch": 1.8891402714932126, + "grad_norm": 1.2407021522521973, + "learning_rate": 9.953811305088142e-06, + "loss": 0.1722, + "step": 835 + }, + { + "epoch": 1.9004524886877827, + "grad_norm": 1.0112310647964478, + "learning_rate": 9.777968517178967e-06, + "loss": 0.1519, + "step": 840 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 1.1441476345062256, + "learning_rate": 9.60293860123564e-06, + "loss": 0.1626, + "step": 845 + }, + { + "epoch": 1.9230769230769231, + "grad_norm": 0.9927852749824524, + "learning_rate": 9.428748802768328e-06, + "loss": 0.1717, + "step": 850 + }, + { + "epoch": 1.9343891402714932, + "grad_norm": 1.130632758140564, + "learning_rate": 9.25542623651281e-06, + "loss": 0.1783, + "step": 855 + }, + { + "epoch": 1.9457013574660633, + "grad_norm": 1.5049558877944946, + "learning_rate": 9.082997882209754e-06, + "loss": 0.1756, + "step": 860 + }, + { + "epoch": 1.9570135746606336, + "grad_norm": 1.4394689798355103, + "learning_rate": 8.911490580404996e-06, + "loss": 0.1638, + "step": 865 + }, + { + "epoch": 1.9683257918552037, + "grad_norm": 1.2555463314056396, + "learning_rate": 8.740931028271462e-06, + "loss": 0.1533, + "step": 870 + }, + { + "epoch": 1.9796380090497738, + "grad_norm": 1.2319729328155518, + "learning_rate": 8.571345775453468e-06, + "loss": 0.1593, + "step": 875 + }, + { + "epoch": 1.990950226244344, + "grad_norm": 1.3108621835708618, + "learning_rate": 8.402761219933911e-06, + "loss": 0.1389, + "step": 880 + }, + { + "epoch": 2.002262443438914, + "grad_norm": 1.3924185037612915, + "learning_rate": 8.23520360392515e-06, + "loss": 0.1681, + "step": 885 + }, + { + "epoch": 2.013574660633484, + "grad_norm": 1.3026796579360962, + "learning_rate": 8.068699009784057e-06, + "loss": 0.1406, + "step": 890 + }, + { + "epoch": 2.024886877828054, + "grad_norm": 1.0794360637664795, + "learning_rate": 7.90327335595198e-06, + "loss": 0.1372, + "step": 895 + }, + { + "epoch": 2.0361990950226243, + "grad_norm": 0.9982720613479614, + "learning_rate": 7.738952392920262e-06, + "loss": 0.1339, + "step": 900 + }, + { + "epoch": 2.0475113122171944, + "grad_norm": 1.294332504272461, + "learning_rate": 7.575761699221828e-06, + "loss": 0.1281, + "step": 905 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.9899606108665466, + "learning_rate": 7.413726677449603e-06, + "loss": 0.1224, + "step": 910 + }, + { + "epoch": 2.070135746606335, + "grad_norm": 1.0753167867660522, + "learning_rate": 7.252872550302278e-06, + "loss": 0.1238, + "step": 915 + }, + { + "epoch": 2.081447963800905, + "grad_norm": 1.1845439672470093, + "learning_rate": 7.093224356658117e-06, + "loss": 0.1227, + "step": 920 + }, + { + "epoch": 2.0927601809954752, + "grad_norm": 1.1412107944488525, + "learning_rate": 6.934806947677335e-06, + "loss": 0.1128, + "step": 925 + }, + { + "epoch": 2.1040723981900453, + "grad_norm": 0.9579062461853027, + "learning_rate": 6.7776449829337065e-06, + "loss": 0.1304, + "step": 930 + }, + { + "epoch": 2.1153846153846154, + "grad_norm": 0.9280579090118408, + "learning_rate": 6.621762926576046e-06, + "loss": 0.1188, + "step": 935 + }, + { + "epoch": 2.1266968325791855, + "grad_norm": 0.9483746886253357, + "learning_rate": 6.467185043520024e-06, + "loss": 0.125, + "step": 940 + }, + { + "epoch": 2.1380090497737556, + "grad_norm": 1.100545883178711, + "learning_rate": 6.313935395671061e-06, + "loss": 0.1139, + "step": 945 + }, + { + "epoch": 2.1493212669683257, + "grad_norm": 1.2844512462615967, + "learning_rate": 6.162037838178821e-06, + "loss": 0.1313, + "step": 950 + }, + { + "epoch": 2.160633484162896, + "grad_norm": 0.9510179758071899, + "learning_rate": 6.01151601572383e-06, + "loss": 0.107, + "step": 955 + }, + { + "epoch": 2.171945701357466, + "grad_norm": 0.9466827511787415, + "learning_rate": 5.86239335883694e-06, + "loss": 0.1108, + "step": 960 + }, + { + "epoch": 2.183257918552036, + "grad_norm": 0.9869335293769836, + "learning_rate": 5.71469308025205e-06, + "loss": 0.1212, + "step": 965 + }, + { + "epoch": 2.1945701357466065, + "grad_norm": 1.3074133396148682, + "learning_rate": 5.568438171292794e-06, + "loss": 0.1092, + "step": 970 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 1.076827049255371, + "learning_rate": 5.4236513982936396e-06, + "loss": 0.121, + "step": 975 + }, + { + "epoch": 2.2171945701357467, + "grad_norm": 0.8891086578369141, + "learning_rate": 5.280355299056043e-06, + "loss": 0.1078, + "step": 980 + }, + { + "epoch": 2.228506787330317, + "grad_norm": 1.1237887144088745, + "learning_rate": 5.138572179340193e-06, + "loss": 0.0988, + "step": 985 + }, + { + "epoch": 2.239819004524887, + "grad_norm": 1.126628041267395, + "learning_rate": 4.998324109392807e-06, + "loss": 0.1263, + "step": 990 + }, + { + "epoch": 2.251131221719457, + "grad_norm": 1.0269757509231567, + "learning_rate": 4.859632920511675e-06, + "loss": 0.1057, + "step": 995 + }, + { + "epoch": 2.262443438914027, + "grad_norm": 0.9898586869239807, + "learning_rate": 4.7225202016473195e-06, + "loss": 0.1151, + "step": 1000 + }, + { + "epoch": 2.273755656108597, + "grad_norm": 0.9611592292785645, + "learning_rate": 4.587007296042448e-06, + "loss": 0.1099, + "step": 1005 + }, + { + "epoch": 2.2850678733031673, + "grad_norm": 1.150863528251648, + "learning_rate": 4.453115297909595e-06, + "loss": 0.1007, + "step": 1010 + }, + { + "epoch": 2.2963800904977374, + "grad_norm": 1.0080831050872803, + "learning_rate": 4.320865049147563e-06, + "loss": 0.1141, + "step": 1015 + }, + { + "epoch": 2.3076923076923075, + "grad_norm": 0.9897997975349426, + "learning_rate": 4.190277136097146e-06, + "loss": 0.0913, + "step": 1020 + }, + { + "epoch": 2.3190045248868776, + "grad_norm": 0.9703898429870605, + "learning_rate": 4.061371886336584e-06, + "loss": 0.1031, + "step": 1025 + }, + { + "epoch": 2.330316742081448, + "grad_norm": 1.0006749629974365, + "learning_rate": 3.93416936551737e-06, + "loss": 0.1186, + "step": 1030 + }, + { + "epoch": 2.341628959276018, + "grad_norm": 0.9988330006599426, + "learning_rate": 3.808689374240769e-06, + "loss": 0.1182, + "step": 1035 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.918912410736084, + "learning_rate": 3.684951444975608e-06, + "loss": 0.1007, + "step": 1040 + }, + { + "epoch": 2.3642533936651584, + "grad_norm": 0.7413221597671509, + "learning_rate": 3.5629748390178295e-06, + "loss": 0.1085, + "step": 1045 + }, + { + "epoch": 2.3755656108597285, + "grad_norm": 1.2523767948150635, + "learning_rate": 3.442778543492227e-06, + "loss": 0.0973, + "step": 1050 + }, + { + "epoch": 2.3868778280542986, + "grad_norm": 0.8844832181930542, + "learning_rate": 3.324381268396896e-06, + "loss": 0.104, + "step": 1055 + }, + { + "epoch": 2.3981900452488687, + "grad_norm": 1.2548707723617554, + "learning_rate": 3.2078014436907556e-06, + "loss": 0.1179, + "step": 1060 + }, + { + "epoch": 2.409502262443439, + "grad_norm": 0.7335833311080933, + "learning_rate": 3.0930572164247408e-06, + "loss": 0.0936, + "step": 1065 + }, + { + "epoch": 2.420814479638009, + "grad_norm": 1.0428881645202637, + "learning_rate": 2.9801664479169845e-06, + "loss": 0.0873, + "step": 1070 + }, + { + "epoch": 2.4321266968325794, + "grad_norm": 0.8381737470626831, + "learning_rate": 2.8691467109724777e-06, + "loss": 0.0876, + "step": 1075 + }, + { + "epoch": 2.4434389140271495, + "grad_norm": 0.8668937683105469, + "learning_rate": 2.760015287147662e-06, + "loss": 0.1016, + "step": 1080 + }, + { + "epoch": 2.4547511312217196, + "grad_norm": 1.0120211839675903, + "learning_rate": 2.652789164060346e-06, + "loss": 0.1169, + "step": 1085 + }, + { + "epoch": 2.4660633484162897, + "grad_norm": 0.9578264951705933, + "learning_rate": 2.5474850327453785e-06, + "loss": 0.1261, + "step": 1090 + }, + { + "epoch": 2.47737556561086, + "grad_norm": 0.7538727521896362, + "learning_rate": 2.4441192850564962e-06, + "loss": 0.1034, + "step": 1095 + }, + { + "epoch": 2.48868778280543, + "grad_norm": 0.7748970985412598, + "learning_rate": 2.342708011114708e-06, + "loss": 0.0965, + "step": 1100 + }, + { + "epoch": 2.5, + "grad_norm": 0.9045417904853821, + "learning_rate": 2.243266996803712e-06, + "loss": 0.0896, + "step": 1105 + }, + { + "epoch": 2.51131221719457, + "grad_norm": 1.048703670501709, + "learning_rate": 2.1458117213126012e-06, + "loss": 0.0887, + "step": 1110 + }, + { + "epoch": 2.52262443438914, + "grad_norm": 0.9097819924354553, + "learning_rate": 2.0503573547263528e-06, + "loss": 0.0933, + "step": 1115 + }, + { + "epoch": 2.5339366515837103, + "grad_norm": 1.0908763408660889, + "learning_rate": 1.9569187556644336e-06, + "loss": 0.101, + "step": 1120 + }, + { + "epoch": 2.5452488687782804, + "grad_norm": 0.830896258354187, + "learning_rate": 1.8655104689678555e-06, + "loss": 0.0889, + "step": 1125 + }, + { + "epoch": 2.5565610859728505, + "grad_norm": 0.8424963355064392, + "learning_rate": 1.7761467234351191e-06, + "loss": 0.1053, + "step": 1130 + }, + { + "epoch": 2.5678733031674206, + "grad_norm": 0.7258997559547424, + "learning_rate": 1.6888414296073058e-06, + "loss": 0.0943, + "step": 1135 + }, + { + "epoch": 2.579185520361991, + "grad_norm": 0.7496826648712158, + "learning_rate": 1.6036081776027623e-06, + "loss": 0.0888, + "step": 1140 + }, + { + "epoch": 2.590497737556561, + "grad_norm": 0.8899266123771667, + "learning_rate": 1.52046023500161e-06, + "loss": 0.0853, + "step": 1145 + }, + { + "epoch": 2.6018099547511313, + "grad_norm": 0.7951101660728455, + "learning_rate": 1.4394105447804994e-06, + "loss": 0.0815, + "step": 1150 + }, + { + "epoch": 2.6131221719457014, + "grad_norm": 0.7465418577194214, + "learning_rate": 1.360471723297882e-06, + "loss": 0.088, + "step": 1155 + }, + { + "epoch": 2.6244343891402715, + "grad_norm": 0.6331043243408203, + "learning_rate": 1.2836560583301139e-06, + "loss": 0.0787, + "step": 1160 + }, + { + "epoch": 2.6357466063348416, + "grad_norm": 1.2532057762145996, + "learning_rate": 1.20897550715873e-06, + "loss": 0.1067, + "step": 1165 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 1.0746504068374634, + "learning_rate": 1.1364416947091244e-06, + "loss": 0.1044, + "step": 1170 + }, + { + "epoch": 2.658371040723982, + "grad_norm": 0.7175267934799194, + "learning_rate": 1.066065911741021e-06, + "loss": 0.0933, + "step": 1175 + }, + { + "epoch": 2.669683257918552, + "grad_norm": 0.666314423084259, + "learning_rate": 9.978591130909142e-07, + "loss": 0.0923, + "step": 1180 + }, + { + "epoch": 2.6809954751131224, + "grad_norm": 0.7984594106674194, + "learning_rate": 9.318319159668137e-07, + "loss": 0.0802, + "step": 1185 + }, + { + "epoch": 2.6923076923076925, + "grad_norm": 0.7327749133110046, + "learning_rate": 8.679945982955589e-07, + "loss": 0.0898, + "step": 1190 + }, + { + "epoch": 2.7036199095022626, + "grad_norm": 0.7253024578094482, + "learning_rate": 8.063570971229245e-07, + "loss": 0.0748, + "step": 1195 + }, + { + "epoch": 2.7149321266968327, + "grad_norm": 1.0236749649047852, + "learning_rate": 7.469290070668189e-07, + "loss": 0.0953, + "step": 1200 + }, + { + "epoch": 2.726244343891403, + "grad_norm": 0.7124127745628357, + "learning_rate": 6.897195788237442e-07, + "loss": 0.0966, + "step": 1205 + }, + { + "epoch": 2.737556561085973, + "grad_norm": 0.6178568601608276, + "learning_rate": 6.347377177288283e-07, + "loss": 0.0883, + "step": 1210 + }, + { + "epoch": 2.748868778280543, + "grad_norm": 0.6644824147224426, + "learning_rate": 5.819919823695996e-07, + "loss": 0.0876, + "step": 1215 + }, + { + "epoch": 2.760180995475113, + "grad_norm": 0.7911688089370728, + "learning_rate": 5.31490583253737e-07, + "loss": 0.0917, + "step": 1220 + }, + { + "epoch": 2.771493212669683, + "grad_norm": 0.87834233045578, + "learning_rate": 4.832413815310083e-07, + "loss": 0.1041, + "step": 1225 + }, + { + "epoch": 2.7828054298642533, + "grad_norm": 0.7534009218215942, + "learning_rate": 4.3725188776958247e-07, + "loss": 0.0946, + "step": 1230 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.8493807911872864, + "learning_rate": 3.935292607869334e-07, + "loss": 0.1094, + "step": 1235 + }, + { + "epoch": 2.8054298642533935, + "grad_norm": 0.6921862363815308, + "learning_rate": 3.520803065354694e-07, + "loss": 0.0861, + "step": 1240 + }, + { + "epoch": 2.8167420814479636, + "grad_norm": 0.7049110531806946, + "learning_rate": 3.129114770431074e-07, + "loss": 0.0876, + "step": 1245 + }, + { + "epoch": 2.8280542986425337, + "grad_norm": 0.6821387410163879, + "learning_rate": 2.7602886940894633e-07, + "loss": 0.0971, + "step": 1250 + }, + { + "epoch": 2.839366515837104, + "grad_norm": 0.7714629173278809, + "learning_rate": 2.41438224854168e-07, + "loss": 0.0902, + "step": 1255 + }, + { + "epoch": 2.8506787330316743, + "grad_norm": 0.8271575570106506, + "learning_rate": 2.0914492782835194e-07, + "loss": 0.0996, + "step": 1260 + }, + { + "epoch": 2.8619909502262444, + "grad_norm": 0.8146201968193054, + "learning_rate": 1.791540051713325e-07, + "loss": 0.0797, + "step": 1265 + }, + { + "epoch": 2.8733031674208145, + "grad_norm": 0.712128758430481, + "learning_rate": 1.514701253306866e-07, + "loss": 0.0787, + "step": 1270 + }, + { + "epoch": 2.8846153846153846, + "grad_norm": 0.6784965395927429, + "learning_rate": 1.260975976350598e-07, + "loss": 0.0965, + "step": 1275 + }, + { + "epoch": 2.8959276018099547, + "grad_norm": 0.7491218447685242, + "learning_rate": 1.0304037162334467e-07, + "loss": 0.0949, + "step": 1280 + }, + { + "epoch": 2.9072398190045248, + "grad_norm": 0.6661131381988525, + "learning_rate": 8.23020364299093e-08, + "loss": 0.0854, + "step": 1285 + }, + { + "epoch": 2.918552036199095, + "grad_norm": 0.8049013018608093, + "learning_rate": 6.388582022588241e-08, + "loss": 0.0996, + "step": 1290 + }, + { + "epoch": 2.9298642533936654, + "grad_norm": 0.7447080612182617, + "learning_rate": 4.779458971667205e-08, + "loss": 0.0973, + "step": 1295 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.7686961889266968, + "learning_rate": 3.4030849695710905e-08, + "loss": 0.0903, + "step": 1300 + }, + { + "epoch": 2.9524886877828056, + "grad_norm": 0.7263092398643494, + "learning_rate": 2.2596742654564795e-08, + "loss": 0.0952, + "step": 1305 + }, + { + "epoch": 2.9638009049773757, + "grad_norm": 0.8017034530639648, + "learning_rate": 1.3494048449426145e-08, + "loss": 0.0898, + "step": 1310 + }, + { + "epoch": 2.975113122171946, + "grad_norm": 0.7985490560531616, + "learning_rate": 6.724184024057279e-09, + "loss": 0.0921, + "step": 1315 + }, + { + "epoch": 2.986425339366516, + "grad_norm": 0.836917519569397, + "learning_rate": 2.28820318922518e-09, + "loss": 0.1036, + "step": 1320 + }, + { + "epoch": 2.997737556561086, + "grad_norm": 1.0875036716461182, + "learning_rate": 1.8679645866437335e-10, + "loss": 0.0911, + "step": 1325 + }, + { + "epoch": 3.0, + "step": 1326, + "total_flos": 1.6798389543182008e+18, + "train_loss": 0.43523811194961426, + "train_runtime": 1333.6816, + "train_samples_per_second": 31.802, + "train_steps_per_second": 0.994 + } + ], + "logging_steps": 5, + "max_steps": 1326, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6798389543182008e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..142cf96526c1d88cf5d6f80bb859a7ee76e3a62c --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/17_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c2aaeafacb4de4cc0a64e3d74cc20528791589e3a34d8d8fbefaddaf6c837c5 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4e30ec064835f091f8914595a3ad387b39f56830 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 18_128_e3_3e-5 + results: [] +--- + + + +# 18_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c5c8ac217b0f8678bf3e4b0ddd9a97dec15fc06 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "up_proj", + "down_proj", + "v_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b9fdeb8357006f803b6654c344a449c9225730f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496377dd3459360447161c14f43b26b49d492200c4f025f4ebd48075a3277579 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..70907d1cd21d1a2a6cc1a6a743a5dc865c8e6ccf --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.0513325630265754e+18, + "train_loss": 0.41264643938574075, + "train_runtime": 851.0131, + "train_samples": 9341, + "train_samples_per_second": 32.929, + "train_steps_per_second": 1.029 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..70907d1cd21d1a2a6cc1a6a743a5dc865c8e6ccf --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.0513325630265754e+18, + "train_loss": 0.41264643938574075, + "train_runtime": 851.0131, + "train_samples": 9341, + "train_samples_per_second": 32.929, + "train_steps_per_second": 1.029 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e299806c6b41062292e0cb829262e804de1e142c --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1268 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 876, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.017123287671232876, + "grad_norm": 0.7707855701446533, + "learning_rate": 2.7272727272727272e-06, + "loss": 1.5395, + "step": 5 + }, + { + "epoch": 0.03424657534246575, + "grad_norm": 0.6173825263977051, + "learning_rate": 6.136363636363637e-06, + "loss": 1.6205, + "step": 10 + }, + { + "epoch": 0.05136986301369863, + "grad_norm": 0.6316550970077515, + "learning_rate": 9.545454545454545e-06, + "loss": 1.5462, + "step": 15 + }, + { + "epoch": 0.0684931506849315, + "grad_norm": 0.6844937801361084, + "learning_rate": 1.2954545454545455e-05, + "loss": 1.4775, + "step": 20 + }, + { + "epoch": 0.08561643835616438, + "grad_norm": 0.4585249423980713, + "learning_rate": 1.6363636363636363e-05, + "loss": 1.4725, + "step": 25 + }, + { + "epoch": 0.10273972602739725, + "grad_norm": 0.5080065727233887, + "learning_rate": 1.9772727272727274e-05, + "loss": 1.3989, + "step": 30 + }, + { + "epoch": 0.11986301369863013, + "grad_norm": 0.5693079829216003, + "learning_rate": 2.318181818181818e-05, + "loss": 1.468, + "step": 35 + }, + { + "epoch": 0.136986301369863, + "grad_norm": 0.5279492139816284, + "learning_rate": 2.6590909090909093e-05, + "loss": 1.3988, + "step": 40 + }, + { + "epoch": 0.1541095890410959, + "grad_norm": 0.580997884273529, + "learning_rate": 3e-05, + "loss": 1.417, + "step": 45 + }, + { + "epoch": 0.17123287671232876, + "grad_norm": 0.6076634526252747, + "learning_rate": 2.999732673837156e-05, + "loss": 1.2467, + "step": 50 + }, + { + "epoch": 0.18835616438356165, + "grad_norm": 0.5819867253303528, + "learning_rate": 2.9989307906329936e-05, + "loss": 1.2721, + "step": 55 + }, + { + "epoch": 0.2054794520547945, + "grad_norm": 0.6385595202445984, + "learning_rate": 2.9975946362066596e-05, + "loss": 1.2909, + "step": 60 + }, + { + "epoch": 0.2226027397260274, + "grad_norm": 0.6716596484184265, + "learning_rate": 2.995724686810202e-05, + "loss": 1.2768, + "step": 65 + }, + { + "epoch": 0.23972602739726026, + "grad_norm": 0.6922211050987244, + "learning_rate": 2.9933216089588158e-05, + "loss": 1.2084, + "step": 70 + }, + { + "epoch": 0.2568493150684932, + "grad_norm": 0.7001160383224487, + "learning_rate": 2.9903862591932762e-05, + "loss": 1.2146, + "step": 75 + }, + { + "epoch": 0.273972602739726, + "grad_norm": 0.7466607689857483, + "learning_rate": 2.986919683774636e-05, + "loss": 1.1976, + "step": 80 + }, + { + "epoch": 0.2910958904109589, + "grad_norm": 0.7107101082801819, + "learning_rate": 2.9829231183113013e-05, + "loss": 1.124, + "step": 85 + }, + { + "epoch": 0.3082191780821918, + "grad_norm": 0.7726143598556519, + "learning_rate": 2.9783979873186188e-05, + "loss": 1.0958, + "step": 90 + }, + { + "epoch": 0.3253424657534247, + "grad_norm": 0.6908758282661438, + "learning_rate": 2.973345903711128e-05, + "loss": 1.1075, + "step": 95 + }, + { + "epoch": 0.3424657534246575, + "grad_norm": 0.8406662344932556, + "learning_rate": 2.9677686682276623e-05, + "loss": 1.0675, + "step": 100 + }, + { + "epoch": 0.3595890410958904, + "grad_norm": 0.8028907179832458, + "learning_rate": 2.9616682687895038e-05, + "loss": 1.037, + "step": 105 + }, + { + "epoch": 0.3767123287671233, + "grad_norm": 0.8559976816177368, + "learning_rate": 2.9550468797918162e-05, + "loss": 0.9528, + "step": 110 + }, + { + "epoch": 0.3938356164383562, + "grad_norm": 0.862770676612854, + "learning_rate": 2.947906861328618e-05, + "loss": 0.9355, + "step": 115 + }, + { + "epoch": 0.410958904109589, + "grad_norm": 1.9408719539642334, + "learning_rate": 2.9402507583515604e-05, + "loss": 0.9328, + "step": 120 + }, + { + "epoch": 0.4280821917808219, + "grad_norm": 0.9231855273246765, + "learning_rate": 2.9320812997628184e-05, + "loss": 0.9016, + "step": 125 + }, + { + "epoch": 0.4452054794520548, + "grad_norm": 0.899286687374115, + "learning_rate": 2.923401397442415e-05, + "loss": 0.8709, + "step": 130 + }, + { + "epoch": 0.4623287671232877, + "grad_norm": 0.8978399634361267, + "learning_rate": 2.914214145210324e-05, + "loss": 0.8916, + "step": 135 + }, + { + "epoch": 0.4794520547945205, + "grad_norm": 1.0212494134902954, + "learning_rate": 2.9045228177237285e-05, + "loss": 0.8209, + "step": 140 + }, + { + "epoch": 0.4965753424657534, + "grad_norm": 1.0693618059158325, + "learning_rate": 2.894330869309814e-05, + "loss": 0.8554, + "step": 145 + }, + { + "epoch": 0.5136986301369864, + "grad_norm": 1.0181245803833008, + "learning_rate": 2.8836419327345297e-05, + "loss": 0.8066, + "step": 150 + }, + { + "epoch": 0.5308219178082192, + "grad_norm": 0.9633310437202454, + "learning_rate": 2.8724598179077413e-05, + "loss": 0.8387, + "step": 155 + }, + { + "epoch": 0.547945205479452, + "grad_norm": 1.162981629371643, + "learning_rate": 2.8607885105252473e-05, + "loss": 0.7537, + "step": 160 + }, + { + "epoch": 0.565068493150685, + "grad_norm": 1.1320053339004517, + "learning_rate": 2.848632170648139e-05, + "loss": 0.7319, + "step": 165 + }, + { + "epoch": 0.5821917808219178, + "grad_norm": 1.1333472728729248, + "learning_rate": 2.8359951312200077e-05, + "loss": 0.7242, + "step": 170 + }, + { + "epoch": 0.5993150684931506, + "grad_norm": 1.107226848602295, + "learning_rate": 2.8228818965225325e-05, + "loss": 0.7392, + "step": 175 + }, + { + "epoch": 0.6164383561643836, + "grad_norm": 1.5738768577575684, + "learning_rate": 2.8092971405700004e-05, + "loss": 0.713, + "step": 180 + }, + { + "epoch": 0.6335616438356164, + "grad_norm": 1.1799507141113281, + "learning_rate": 2.7952457054433193e-05, + "loss": 0.6953, + "step": 185 + }, + { + "epoch": 0.6506849315068494, + "grad_norm": 1.2730201482772827, + "learning_rate": 2.780732599564137e-05, + "loss": 0.6927, + "step": 190 + }, + { + "epoch": 0.6678082191780822, + "grad_norm": 1.2045273780822754, + "learning_rate": 2.76576299590966e-05, + "loss": 0.6563, + "step": 195 + }, + { + "epoch": 0.684931506849315, + "grad_norm": 1.3648529052734375, + "learning_rate": 2.7503422301688276e-05, + "loss": 0.662, + "step": 200 + }, + { + "epoch": 0.702054794520548, + "grad_norm": 1.0436056852340698, + "learning_rate": 2.7344757988404845e-05, + "loss": 0.6146, + "step": 205 + }, + { + "epoch": 0.7191780821917808, + "grad_norm": 1.258631944656372, + "learning_rate": 2.718169357274238e-05, + "loss": 0.5944, + "step": 210 + }, + { + "epoch": 0.7363013698630136, + "grad_norm": 1.2473983764648438, + "learning_rate": 2.7014287176546922e-05, + "loss": 0.5881, + "step": 215 + }, + { + "epoch": 0.7534246575342466, + "grad_norm": 1.2907663583755493, + "learning_rate": 2.6842598469297846e-05, + "loss": 0.6322, + "step": 220 + }, + { + "epoch": 0.7705479452054794, + "grad_norm": 1.211248755455017, + "learning_rate": 2.6666688646839574e-05, + "loss": 0.5986, + "step": 225 + }, + { + "epoch": 0.7876712328767124, + "grad_norm": 1.2639895677566528, + "learning_rate": 2.6486620409569222e-05, + "loss": 0.5579, + "step": 230 + }, + { + "epoch": 0.8047945205479452, + "grad_norm": 1.2268179655075073, + "learning_rate": 2.6302457940088024e-05, + "loss": 0.5665, + "step": 235 + }, + { + "epoch": 0.821917808219178, + "grad_norm": 1.3948252201080322, + "learning_rate": 2.611426688032439e-05, + "loss": 0.5628, + "step": 240 + }, + { + "epoch": 0.839041095890411, + "grad_norm": 1.3128544092178345, + "learning_rate": 2.5922114308136826e-05, + "loss": 0.5951, + "step": 245 + }, + { + "epoch": 0.8561643835616438, + "grad_norm": 1.3927007913589478, + "learning_rate": 2.5726068713405084e-05, + "loss": 0.4919, + "step": 250 + }, + { + "epoch": 0.8732876712328768, + "grad_norm": 1.4141489267349243, + "learning_rate": 2.5526199973617932e-05, + "loss": 0.5043, + "step": 255 + }, + { + "epoch": 0.8904109589041096, + "grad_norm": 1.1954162120819092, + "learning_rate": 2.532257932896641e-05, + "loss": 0.5208, + "step": 260 + }, + { + "epoch": 0.9075342465753424, + "grad_norm": 1.1128246784210205, + "learning_rate": 2.511527935695133e-05, + "loss": 0.4863, + "step": 265 + }, + { + "epoch": 0.9246575342465754, + "grad_norm": 1.142656683921814, + "learning_rate": 2.4904373946514136e-05, + "loss": 0.4901, + "step": 270 + }, + { + "epoch": 0.9417808219178082, + "grad_norm": 1.1256080865859985, + "learning_rate": 2.468993827170028e-05, + "loss": 0.4418, + "step": 275 + }, + { + "epoch": 0.958904109589041, + "grad_norm": 1.3199682235717773, + "learning_rate": 2.4472048764864602e-05, + "loss": 0.4486, + "step": 280 + }, + { + "epoch": 0.976027397260274, + "grad_norm": 1.3065838813781738, + "learning_rate": 2.425078308942815e-05, + "loss": 0.4808, + "step": 285 + }, + { + "epoch": 0.9931506849315068, + "grad_norm": 1.1987745761871338, + "learning_rate": 2.402622011219622e-05, + "loss": 0.4632, + "step": 290 + }, + { + "epoch": 1.0102739726027397, + "grad_norm": 1.3093665838241577, + "learning_rate": 2.379843987524753e-05, + "loss": 0.3556, + "step": 295 + }, + { + "epoch": 1.0273972602739727, + "grad_norm": 1.3352142572402954, + "learning_rate": 2.3567523567404346e-05, + "loss": 0.4045, + "step": 300 + }, + { + "epoch": 1.0445205479452055, + "grad_norm": 1.5840680599212646, + "learning_rate": 2.3333553495294033e-05, + "loss": 0.4221, + "step": 305 + }, + { + "epoch": 1.0616438356164384, + "grad_norm": 1.292328953742981, + "learning_rate": 2.309661305401205e-05, + "loss": 0.4175, + "step": 310 + }, + { + "epoch": 1.0787671232876712, + "grad_norm": 1.4324355125427246, + "learning_rate": 2.285678669739705e-05, + "loss": 0.402, + "step": 315 + }, + { + "epoch": 1.095890410958904, + "grad_norm": 1.3458586931228638, + "learning_rate": 2.2614159907928588e-05, + "loss": 0.3429, + "step": 320 + }, + { + "epoch": 1.1130136986301369, + "grad_norm": 1.3387055397033691, + "learning_rate": 2.236881916625816e-05, + "loss": 0.3536, + "step": 325 + }, + { + "epoch": 1.13013698630137, + "grad_norm": 1.3068883419036865, + "learning_rate": 2.212085192038453e-05, + "loss": 0.3419, + "step": 330 + }, + { + "epoch": 1.1472602739726028, + "grad_norm": 1.3170369863510132, + "learning_rate": 2.1870346554484154e-05, + "loss": 0.33, + "step": 335 + }, + { + "epoch": 1.1643835616438356, + "grad_norm": 1.2477595806121826, + "learning_rate": 2.161739235740802e-05, + "loss": 0.3569, + "step": 340 + }, + { + "epoch": 1.1815068493150684, + "grad_norm": 1.627367615699768, + "learning_rate": 2.1362079490855968e-05, + "loss": 0.2973, + "step": 345 + }, + { + "epoch": 1.1986301369863013, + "grad_norm": 1.4333659410476685, + "learning_rate": 2.110449895723991e-05, + "loss": 0.3815, + "step": 350 + }, + { + "epoch": 1.2157534246575343, + "grad_norm": 1.3490846157073975, + "learning_rate": 2.084474256724743e-05, + "loss": 0.354, + "step": 355 + }, + { + "epoch": 1.2328767123287672, + "grad_norm": 1.2283445596694946, + "learning_rate": 2.0582902907117193e-05, + "loss": 0.3021, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 1.2130153179168701, + "learning_rate": 2.0319073305638035e-05, + "loss": 0.2795, + "step": 365 + }, + { + "epoch": 1.2671232876712328, + "grad_norm": 1.2800546884536743, + "learning_rate": 2.00533478008833e-05, + "loss": 0.3108, + "step": 370 + }, + { + "epoch": 1.2842465753424657, + "grad_norm": 1.3222861289978027, + "learning_rate": 1.97858211066924e-05, + "loss": 0.2757, + "step": 375 + }, + { + "epoch": 1.3013698630136985, + "grad_norm": 1.1488317251205444, + "learning_rate": 1.9516588578911484e-05, + "loss": 0.2626, + "step": 380 + }, + { + "epoch": 1.3184931506849316, + "grad_norm": 1.2023591995239258, + "learning_rate": 1.9245746181405306e-05, + "loss": 0.2631, + "step": 385 + }, + { + "epoch": 1.3356164383561644, + "grad_norm": 1.1607204675674438, + "learning_rate": 1.8973390451852348e-05, + "loss": 0.2966, + "step": 390 + }, + { + "epoch": 1.3527397260273972, + "grad_norm": 1.2505004405975342, + "learning_rate": 1.8699618467335428e-05, + "loss": 0.2675, + "step": 395 + }, + { + "epoch": 1.36986301369863, + "grad_norm": 1.2396783828735352, + "learning_rate": 1.8424527809740028e-05, + "loss": 0.3042, + "step": 400 + }, + { + "epoch": 1.3869863013698631, + "grad_norm": 1.2249491214752197, + "learning_rate": 1.8148216530972714e-05, + "loss": 0.286, + "step": 405 + }, + { + "epoch": 1.404109589041096, + "grad_norm": 1.328526258468628, + "learning_rate": 1.7870783118012034e-05, + "loss": 0.2623, + "step": 410 + }, + { + "epoch": 1.4212328767123288, + "grad_norm": 1.2126291990280151, + "learning_rate": 1.7592326457804295e-05, + "loss": 0.239, + "step": 415 + }, + { + "epoch": 1.4383561643835616, + "grad_norm": 1.3541003465652466, + "learning_rate": 1.7312945802016817e-05, + "loss": 0.2292, + "step": 420 + }, + { + "epoch": 1.4554794520547945, + "grad_norm": 1.259499192237854, + "learning_rate": 1.7032740731661178e-05, + "loss": 0.2199, + "step": 425 + }, + { + "epoch": 1.4726027397260273, + "grad_norm": 1.2383365631103516, + "learning_rate": 1.675181112159907e-05, + "loss": 0.2676, + "step": 430 + }, + { + "epoch": 1.4897260273972603, + "grad_norm": 1.2413582801818848, + "learning_rate": 1.6470257104943414e-05, + "loss": 0.2443, + "step": 435 + }, + { + "epoch": 1.5068493150684932, + "grad_norm": 1.3570055961608887, + "learning_rate": 1.618817903736741e-05, + "loss": 0.239, + "step": 440 + }, + { + "epoch": 1.523972602739726, + "grad_norm": 1.2950910329818726, + "learning_rate": 1.5905677461334292e-05, + "loss": 0.2341, + "step": 445 + }, + { + "epoch": 1.541095890410959, + "grad_norm": 1.3160887956619263, + "learning_rate": 1.5622853070260492e-05, + "loss": 0.2299, + "step": 450 + }, + { + "epoch": 1.558219178082192, + "grad_norm": 1.1965223550796509, + "learning_rate": 1.5339806672624982e-05, + "loss": 0.2076, + "step": 455 + }, + { + "epoch": 1.5753424657534247, + "grad_norm": 1.605102300643921, + "learning_rate": 1.5056639156037597e-05, + "loss": 0.218, + "step": 460 + }, + { + "epoch": 1.5924657534246576, + "grad_norm": 1.1930174827575684, + "learning_rate": 1.4773451451279213e-05, + "loss": 0.2372, + "step": 465 + }, + { + "epoch": 1.6095890410958904, + "grad_norm": 1.3439286947250366, + "learning_rate": 1.4490344496326463e-05, + "loss": 0.2012, + "step": 470 + }, + { + "epoch": 1.6267123287671232, + "grad_norm": 1.223298192024231, + "learning_rate": 1.4207419200373942e-05, + "loss": 0.1759, + "step": 475 + }, + { + "epoch": 1.643835616438356, + "grad_norm": 1.1850194931030273, + "learning_rate": 1.3924776407866634e-05, + "loss": 0.1852, + "step": 480 + }, + { + "epoch": 1.660958904109589, + "grad_norm": 1.3328778743743896, + "learning_rate": 1.3642516862555433e-05, + "loss": 0.1941, + "step": 485 + }, + { + "epoch": 1.678082191780822, + "grad_norm": 1.4503108263015747, + "learning_rate": 1.3360741171588578e-05, + "loss": 0.1824, + "step": 490 + }, + { + "epoch": 1.6952054794520548, + "grad_norm": 1.3191362619400024, + "learning_rate": 1.3079549769651737e-05, + "loss": 0.237, + "step": 495 + }, + { + "epoch": 1.7123287671232876, + "grad_norm": 1.2299200296401978, + "learning_rate": 1.2799042883169576e-05, + "loss": 0.2111, + "step": 500 + }, + { + "epoch": 1.7294520547945207, + "grad_norm": 1.14627206325531, + "learning_rate": 1.2519320494581581e-05, + "loss": 0.1846, + "step": 505 + }, + { + "epoch": 1.7465753424657535, + "grad_norm": 1.3819398880004883, + "learning_rate": 1.2240482306704831e-05, + "loss": 0.1884, + "step": 510 + }, + { + "epoch": 1.7636986301369864, + "grad_norm": 1.0577572584152222, + "learning_rate": 1.1962627707196407e-05, + "loss": 0.1891, + "step": 515 + }, + { + "epoch": 1.7808219178082192, + "grad_norm": 1.37679123878479, + "learning_rate": 1.1685855733128203e-05, + "loss": 0.1969, + "step": 520 + }, + { + "epoch": 1.797945205479452, + "grad_norm": 1.2934480905532837, + "learning_rate": 1.1410265035686639e-05, + "loss": 0.1747, + "step": 525 + }, + { + "epoch": 1.8150684931506849, + "grad_norm": 1.1471205949783325, + "learning_rate": 1.1135953845009914e-05, + "loss": 0.162, + "step": 530 + }, + { + "epoch": 1.8321917808219177, + "grad_norm": 1.1978737115859985, + "learning_rate": 1.0863019935175415e-05, + "loss": 0.1698, + "step": 535 + }, + { + "epoch": 1.8493150684931505, + "grad_norm": 1.2841060161590576, + "learning_rate": 1.0591560589349568e-05, + "loss": 0.1717, + "step": 540 + }, + { + "epoch": 1.8664383561643836, + "grad_norm": 1.1671046018600464, + "learning_rate": 1.0321672565112767e-05, + "loss": 0.1688, + "step": 545 + }, + { + "epoch": 1.8835616438356164, + "grad_norm": 1.4171589612960815, + "learning_rate": 1.0053452059971555e-05, + "loss": 0.1483, + "step": 550 + }, + { + "epoch": 1.9006849315068495, + "grad_norm": 1.045549988746643, + "learning_rate": 9.786994677070523e-06, + "loss": 0.1567, + "step": 555 + }, + { + "epoch": 1.9178082191780823, + "grad_norm": 1.203100562095642, + "learning_rate": 9.52239539111598e-06, + "loss": 0.1592, + "step": 560 + }, + { + "epoch": 1.9349315068493151, + "grad_norm": 1.1883440017700195, + "learning_rate": 9.259748514523654e-06, + "loss": 0.1441, + "step": 565 + }, + { + "epoch": 1.952054794520548, + "grad_norm": 1.2037888765335083, + "learning_rate": 8.999147663802494e-06, + "loss": 0.1477, + "step": 570 + }, + { + "epoch": 1.9691780821917808, + "grad_norm": 1.1345595121383667, + "learning_rate": 8.740685726186445e-06, + "loss": 0.1487, + "step": 575 + }, + { + "epoch": 1.9863013698630136, + "grad_norm": 1.1282062530517578, + "learning_rate": 8.484454826526199e-06, + "loss": 0.1511, + "step": 580 + }, + { + "epoch": 2.0034246575342465, + "grad_norm": 0.9774717092514038, + "learning_rate": 8.2305462944527e-06, + "loss": 0.1568, + "step": 585 + }, + { + "epoch": 2.0205479452054793, + "grad_norm": 1.0044071674346924, + "learning_rate": 7.979050631824074e-06, + "loss": 0.1284, + "step": 590 + }, + { + "epoch": 2.037671232876712, + "grad_norm": 0.9805011749267578, + "learning_rate": 7.730057480467604e-06, + "loss": 0.1198, + "step": 595 + }, + { + "epoch": 2.0547945205479454, + "grad_norm": 0.8624033331871033, + "learning_rate": 7.4836555902282534e-06, + "loss": 0.1109, + "step": 600 + }, + { + "epoch": 2.0719178082191783, + "grad_norm": 0.9508503079414368, + "learning_rate": 7.239932787335147e-06, + "loss": 0.1308, + "step": 605 + }, + { + "epoch": 2.089041095890411, + "grad_norm": 0.8774089217185974, + "learning_rate": 6.9989759430972105e-06, + "loss": 0.107, + "step": 610 + }, + { + "epoch": 2.106164383561644, + "grad_norm": 0.9493650197982788, + "learning_rate": 6.760870942939202e-06, + "loss": 0.1114, + "step": 615 + }, + { + "epoch": 2.1232876712328768, + "grad_norm": 1.0636200904846191, + "learning_rate": 6.525702655789201e-06, + "loss": 0.1047, + "step": 620 + }, + { + "epoch": 2.1404109589041096, + "grad_norm": 1.0461673736572266, + "learning_rate": 6.293554903828302e-06, + "loss": 0.0981, + "step": 625 + }, + { + "epoch": 2.1575342465753424, + "grad_norm": 0.9943008422851562, + "learning_rate": 6.0645104326135e-06, + "loss": 0.1044, + "step": 630 + }, + { + "epoch": 2.1746575342465753, + "grad_norm": 1.4208968877792358, + "learning_rate": 5.8386508815842746e-06, + "loss": 0.108, + "step": 635 + }, + { + "epoch": 2.191780821917808, + "grad_norm": 0.9838569164276123, + "learning_rate": 5.61605675496345e-06, + "loss": 0.104, + "step": 640 + }, + { + "epoch": 2.208904109589041, + "grad_norm": 1.0222171545028687, + "learning_rate": 5.396807393062681e-06, + "loss": 0.094, + "step": 645 + }, + { + "epoch": 2.2260273972602738, + "grad_norm": 1.0453369617462158, + "learning_rate": 5.180980944002799e-06, + "loss": 0.1202, + "step": 650 + }, + { + "epoch": 2.243150684931507, + "grad_norm": 1.3638309240341187, + "learning_rate": 4.9686543358590934e-06, + "loss": 0.1034, + "step": 655 + }, + { + "epoch": 2.26027397260274, + "grad_norm": 0.9355936646461487, + "learning_rate": 4.759903249241464e-06, + "loss": 0.0948, + "step": 660 + }, + { + "epoch": 2.2773972602739727, + "grad_norm": 1.8155180215835571, + "learning_rate": 4.554802090319209e-06, + "loss": 0.0982, + "step": 665 + }, + { + "epoch": 2.2945205479452055, + "grad_norm": 0.9513065218925476, + "learning_rate": 4.353423964300074e-06, + "loss": 0.0979, + "step": 670 + }, + { + "epoch": 2.3116438356164384, + "grad_norm": 0.9023252725601196, + "learning_rate": 4.155840649373015e-06, + "loss": 0.1096, + "step": 675 + }, + { + "epoch": 2.328767123287671, + "grad_norm": 1.0558077096939087, + "learning_rate": 3.96212257112391e-06, + "loss": 0.0847, + "step": 680 + }, + { + "epoch": 2.345890410958904, + "grad_norm": 1.0877296924591064, + "learning_rate": 3.772338777433482e-06, + "loss": 0.0989, + "step": 685 + }, + { + "epoch": 2.363013698630137, + "grad_norm": 0.9272650480270386, + "learning_rate": 3.5865569138661814e-06, + "loss": 0.0941, + "step": 690 + }, + { + "epoch": 2.3801369863013697, + "grad_norm": 0.8795062899589539, + "learning_rate": 3.4048431995589453e-06, + "loss": 0.0919, + "step": 695 + }, + { + "epoch": 2.3972602739726026, + "grad_norm": 0.788815975189209, + "learning_rate": 3.22726240361843e-06, + "loss": 0.094, + "step": 700 + }, + { + "epoch": 2.4143835616438354, + "grad_norm": 0.9455450177192688, + "learning_rate": 3.053877822034995e-06, + "loss": 0.0823, + "step": 705 + }, + { + "epoch": 2.4315068493150687, + "grad_norm": 0.8720506429672241, + "learning_rate": 2.884751255121827e-06, + "loss": 0.0902, + "step": 710 + }, + { + "epoch": 2.4486301369863015, + "grad_norm": 0.7250347137451172, + "learning_rate": 2.7199429854871544e-06, + "loss": 0.0849, + "step": 715 + }, + { + "epoch": 2.4657534246575343, + "grad_norm": 0.8255820274353027, + "learning_rate": 2.559511756547407e-06, + "loss": 0.0906, + "step": 720 + }, + { + "epoch": 2.482876712328767, + "grad_norm": 1.0412914752960205, + "learning_rate": 2.403514751589032e-06, + "loss": 0.0956, + "step": 725 + }, + { + "epoch": 2.5, + "grad_norm": 0.9044472575187683, + "learning_rate": 2.252007573386365e-06, + "loss": 0.1005, + "step": 730 + }, + { + "epoch": 2.517123287671233, + "grad_norm": 0.8041086792945862, + "learning_rate": 2.105044224382854e-06, + "loss": 0.0885, + "step": 735 + }, + { + "epoch": 2.5342465753424657, + "grad_norm": 0.8393188714981079, + "learning_rate": 1.9626770874427368e-06, + "loss": 0.088, + "step": 740 + }, + { + "epoch": 2.5513698630136985, + "grad_norm": 0.7377734780311584, + "learning_rate": 1.8249569071799134e-06, + "loss": 0.0926, + "step": 745 + }, + { + "epoch": 2.5684931506849313, + "grad_norm": 0.7328418493270874, + "learning_rate": 1.69193277187083e-06, + "loss": 0.0796, + "step": 750 + }, + { + "epoch": 2.5856164383561646, + "grad_norm": 0.8020691275596619, + "learning_rate": 1.5636520959577094e-06, + "loss": 0.0964, + "step": 755 + }, + { + "epoch": 2.602739726027397, + "grad_norm": 0.6919242143630981, + "learning_rate": 1.44016060314835e-06, + "loss": 0.0729, + "step": 760 + }, + { + "epoch": 2.6198630136986303, + "grad_norm": 0.8454508781433105, + "learning_rate": 1.321502310118649e-06, + "loss": 0.0791, + "step": 765 + }, + { + "epoch": 2.636986301369863, + "grad_norm": 0.7785512208938599, + "learning_rate": 1.2077195108234934e-06, + "loss": 0.092, + "step": 770 + }, + { + "epoch": 2.654109589041096, + "grad_norm": 0.8322052359580994, + "learning_rate": 1.098852761421719e-06, + "loss": 0.0848, + "step": 775 + }, + { + "epoch": 2.671232876712329, + "grad_norm": 0.7372844219207764, + "learning_rate": 9.949408658205072e-07, + "loss": 0.0793, + "step": 780 + }, + { + "epoch": 2.6883561643835616, + "grad_norm": 0.7700328826904297, + "learning_rate": 8.960208618442883e-07, + "loss": 0.0742, + "step": 785 + }, + { + "epoch": 2.7054794520547945, + "grad_norm": 0.7672784328460693, + "learning_rate": 8.021280080331816e-07, + "loss": 0.0921, + "step": 790 + }, + { + "epoch": 2.7226027397260273, + "grad_norm": 0.7001596689224243, + "learning_rate": 7.132957710756277e-07, + "loss": 0.0928, + "step": 795 + }, + { + "epoch": 2.73972602739726, + "grad_norm": 0.7488632202148438, + "learning_rate": 6.295558138796803e-07, + "loss": 0.0687, + "step": 800 + }, + { + "epoch": 2.756849315068493, + "grad_norm": 0.7398976683616638, + "learning_rate": 5.509379842872558e-07, + "loss": 0.0832, + "step": 805 + }, + { + "epoch": 2.7739726027397262, + "grad_norm": 0.7685964703559875, + "learning_rate": 4.774703044353051e-07, + "loss": 0.0727, + "step": 810 + }, + { + "epoch": 2.791095890410959, + "grad_norm": 0.7078747153282166, + "learning_rate": 4.091789607677582e-07, + "loss": 0.0799, + "step": 815 + }, + { + "epoch": 2.808219178082192, + "grad_norm": 0.9180399179458618, + "learning_rate": 3.460882947017635e-07, + "loss": 0.0803, + "step": 820 + }, + { + "epoch": 2.8253424657534247, + "grad_norm": 0.7753926515579224, + "learning_rate": 2.8822079395154357e-07, + "loss": 0.0709, + "step": 825 + }, + { + "epoch": 2.8424657534246576, + "grad_norm": 0.7497108578681946, + "learning_rate": 2.3559708451300622e-07, + "loss": 0.0773, + "step": 830 + }, + { + "epoch": 2.8595890410958904, + "grad_norm": 0.6873535513877869, + "learning_rate": 1.8823592331191242e-07, + "loss": 0.0815, + "step": 835 + }, + { + "epoch": 2.8767123287671232, + "grad_norm": 0.6233852505683899, + "learning_rate": 1.4615419151824406e-07, + "loss": 0.072, + "step": 840 + }, + { + "epoch": 2.893835616438356, + "grad_norm": 0.9000762701034546, + "learning_rate": 1.0936688852919042e-07, + "loss": 0.0877, + "step": 845 + }, + { + "epoch": 2.910958904109589, + "grad_norm": 0.8863382935523987, + "learning_rate": 7.788712662281317e-08, + "loss": 0.0856, + "step": 850 + }, + { + "epoch": 2.928082191780822, + "grad_norm": 0.9054489731788635, + "learning_rate": 5.1726126284389886e-08, + "loss": 0.0831, + "step": 855 + }, + { + "epoch": 2.9452054794520546, + "grad_norm": 0.6619481444358826, + "learning_rate": 3.0893212207036556e-08, + "loss": 0.0852, + "step": 860 + }, + { + "epoch": 2.962328767123288, + "grad_norm": 0.7987244129180908, + "learning_rate": 1.5395809968061226e-08, + "loss": 0.0868, + "step": 865 + }, + { + "epoch": 2.9794520547945207, + "grad_norm": 0.8010684251785278, + "learning_rate": 5.239443382229481e-09, + "loss": 0.0753, + "step": 870 + }, + { + "epoch": 2.9965753424657535, + "grad_norm": 0.6669439077377319, + "learning_rate": 4.277325328860826e-10, + "loss": 0.0928, + "step": 875 + }, + { + "epoch": 3.0, + "step": 876, + "total_flos": 1.0513325630265754e+18, + "train_loss": 0.41264643938574075, + "train_runtime": 851.0131, + "train_samples_per_second": 32.929, + "train_steps_per_second": 1.029 + } + ], + "logging_steps": 5, + "max_steps": 876, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0513325630265754e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..952205c08999462b672a8c9b09a9757391f3163a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/18_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd399c3290192caf26c0e41449e39c9e07ffa915adfbdfcac68f6e44af9efd01 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ad15617bb1c40c13183527b9c94486e7dd31a31 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 19_128_e3_3e-5 + results: [] +--- + + + +# 19_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddd7c8eaaf6b82cc49fa52b0d6f4d63a55fb4443 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "down_proj", + "k_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8bb782d82794cf9b1ceca4d815d54107c39fc78 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5564712ce2e943f40d8f3cecab3718a1bad4cdc7e445e3e7e685bb771da547bc +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..34b4de1f1d988808b438d8c786fc021d850e5e3f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4339270676987249e+18, + "train_loss": 0.40115166138461295, + "train_runtime": 1136.1457, + "train_samples": 12032, + "train_samples_per_second": 31.771, + "train_steps_per_second": 0.993 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..34b4de1f1d988808b438d8c786fc021d850e5e3f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4339270676987249e+18, + "train_loss": 0.40115166138461295, + "train_runtime": 1136.1457, + "train_samples": 12032, + "train_samples_per_second": 31.771, + "train_steps_per_second": 0.993 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5453eab9467a56211542d5d2189e268322f70bd9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1618 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013297872340425532, + "grad_norm": 0.6227388381958008, + "learning_rate": 2.1052631578947366e-06, + "loss": 1.455, + "step": 5 + }, + { + "epoch": 0.026595744680851064, + "grad_norm": 0.641569972038269, + "learning_rate": 4.736842105263158e-06, + "loss": 1.4928, + "step": 10 + }, + { + "epoch": 0.0398936170212766, + "grad_norm": 0.5281710028648376, + "learning_rate": 7.3684210526315784e-06, + "loss": 1.4508, + "step": 15 + }, + { + "epoch": 0.05319148936170213, + "grad_norm": 0.49616628885269165, + "learning_rate": 9.999999999999999e-06, + "loss": 1.4719, + "step": 20 + }, + { + "epoch": 0.06648936170212766, + "grad_norm": 0.49781522154808044, + "learning_rate": 1.263157894736842e-05, + "loss": 1.3889, + "step": 25 + }, + { + "epoch": 0.0797872340425532, + "grad_norm": 0.4703419804573059, + "learning_rate": 1.5263157894736842e-05, + "loss": 1.3944, + "step": 30 + }, + { + "epoch": 0.09308510638297872, + "grad_norm": 0.4666573703289032, + "learning_rate": 1.7894736842105264e-05, + "loss": 1.4003, + "step": 35 + }, + { + "epoch": 0.10638297872340426, + "grad_norm": 0.5086567401885986, + "learning_rate": 2.0526315789473685e-05, + "loss": 1.3457, + "step": 40 + }, + { + "epoch": 0.1196808510638298, + "grad_norm": 0.48269549012184143, + "learning_rate": 2.3157894736842103e-05, + "loss": 1.3209, + "step": 45 + }, + { + "epoch": 0.13297872340425532, + "grad_norm": 0.5147978067398071, + "learning_rate": 2.578947368421053e-05, + "loss": 1.3381, + "step": 50 + }, + { + "epoch": 0.14627659574468085, + "grad_norm": 0.4784543514251709, + "learning_rate": 2.8421052631578946e-05, + "loss": 1.2557, + "step": 55 + }, + { + "epoch": 0.1595744680851064, + "grad_norm": 0.5767329931259155, + "learning_rate": 2.9999741868614275e-05, + "loss": 1.2752, + "step": 60 + }, + { + "epoch": 0.17287234042553193, + "grad_norm": 0.5884501338005066, + "learning_rate": 2.999683799255387e-05, + "loss": 1.2227, + "step": 65 + }, + { + "epoch": 0.18617021276595744, + "grad_norm": 0.5452523827552795, + "learning_rate": 2.9990708202925038e-05, + "loss": 1.2652, + "step": 70 + }, + { + "epoch": 0.19946808510638298, + "grad_norm": 0.5928755402565002, + "learning_rate": 2.9981353818283835e-05, + "loss": 1.2061, + "step": 75 + }, + { + "epoch": 0.2127659574468085, + "grad_norm": 0.6125819087028503, + "learning_rate": 2.996877685081685e-05, + "loss": 1.2386, + "step": 80 + }, + { + "epoch": 0.22606382978723405, + "grad_norm": 0.6594212055206299, + "learning_rate": 2.995298000590839e-05, + "loss": 1.1788, + "step": 85 + }, + { + "epoch": 0.2393617021276596, + "grad_norm": 0.6206346154212952, + "learning_rate": 2.99339666815585e-05, + "loss": 1.1339, + "step": 90 + }, + { + "epoch": 0.2526595744680851, + "grad_norm": 0.6499509215354919, + "learning_rate": 2.9911740967652065e-05, + "loss": 1.127, + "step": 95 + }, + { + "epoch": 0.26595744680851063, + "grad_norm": 0.8049723505973816, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1297, + "step": 100 + }, + { + "epoch": 0.27925531914893614, + "grad_norm": 0.7181145548820496, + "learning_rate": 2.9857672184706038e-05, + "loss": 1.0865, + "step": 105 + }, + { + "epoch": 0.2925531914893617, + "grad_norm": 0.7494776844978333, + "learning_rate": 2.9825840746199534e-05, + "loss": 1.0488, + "step": 110 + }, + { + "epoch": 0.3058510638297872, + "grad_norm": 0.7208320498466492, + "learning_rate": 2.9790820176700872e-05, + "loss": 1.0167, + "step": 115 + }, + { + "epoch": 0.3191489361702128, + "grad_norm": 0.7929312586784363, + "learning_rate": 2.975261800935339e-05, + "loss": 1.0162, + "step": 120 + }, + { + "epoch": 0.3324468085106383, + "grad_norm": 0.8076171278953552, + "learning_rate": 2.971124246168202e-05, + "loss": 1.0307, + "step": 125 + }, + { + "epoch": 0.34574468085106386, + "grad_norm": 0.9051169753074646, + "learning_rate": 2.9666702433825614e-05, + "loss": 0.9968, + "step": 130 + }, + { + "epoch": 0.35904255319148937, + "grad_norm": 0.9477875828742981, + "learning_rate": 2.9619007506622506e-05, + "loss": 0.8826, + "step": 135 + }, + { + "epoch": 0.3723404255319149, + "grad_norm": 0.7885204553604126, + "learning_rate": 2.956816793954958e-05, + "loss": 0.9265, + "step": 140 + }, + { + "epoch": 0.38563829787234044, + "grad_norm": 0.930425763130188, + "learning_rate": 2.951419466851542e-05, + "loss": 0.9759, + "step": 145 + }, + { + "epoch": 0.39893617021276595, + "grad_norm": 0.7818400263786316, + "learning_rate": 2.9457099303507904e-05, + "loss": 0.9178, + "step": 150 + }, + { + "epoch": 0.4122340425531915, + "grad_norm": 0.813630223274231, + "learning_rate": 2.939689412609684e-05, + "loss": 0.8438, + "step": 155 + }, + { + "epoch": 0.425531914893617, + "grad_norm": 0.932170033454895, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.8353, + "step": 160 + }, + { + "epoch": 0.43882978723404253, + "grad_norm": 1.1435537338256836, + "learning_rate": 2.9267206802257952e-05, + "loss": 0.9124, + "step": 165 + }, + { + "epoch": 0.4521276595744681, + "grad_norm": 0.9625189304351807, + "learning_rate": 2.919775255238392e-05, + "loss": 0.7868, + "step": 170 + }, + { + "epoch": 0.4654255319148936, + "grad_norm": 0.8912039995193481, + "learning_rate": 2.9125244277213176e-05, + "loss": 0.8799, + "step": 175 + }, + { + "epoch": 0.4787234042553192, + "grad_norm": 1.1171159744262695, + "learning_rate": 2.9049697573728818e-05, + "loss": 0.801, + "step": 180 + }, + { + "epoch": 0.4920212765957447, + "grad_norm": 0.9682158827781677, + "learning_rate": 2.8971128692498872e-05, + "loss": 0.7514, + "step": 185 + }, + { + "epoch": 0.5053191489361702, + "grad_norm": 0.9352660775184631, + "learning_rate": 2.8889554534180664e-05, + "loss": 0.7746, + "step": 190 + }, + { + "epoch": 0.5186170212765957, + "grad_norm": 0.9201658368110657, + "learning_rate": 2.8804992645885415e-05, + "loss": 0.7835, + "step": 195 + }, + { + "epoch": 0.5319148936170213, + "grad_norm": 1.0329482555389404, + "learning_rate": 2.8717461217403726e-05, + "loss": 0.7193, + "step": 200 + }, + { + "epoch": 0.5452127659574468, + "grad_norm": 1.3644397258758545, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.6963, + "step": 205 + }, + { + "epoch": 0.5585106382978723, + "grad_norm": 1.0713317394256592, + "learning_rate": 2.853356568882657e-05, + "loss": 0.6843, + "step": 210 + }, + { + "epoch": 0.5718085106382979, + "grad_norm": 1.0178943872451782, + "learning_rate": 2.843724114580848e-05, + "loss": 0.7079, + "step": 215 + }, + { + "epoch": 0.5851063829787234, + "grad_norm": 1.1269890069961548, + "learning_rate": 2.833802616824972e-05, + "loss": 0.7703, + "step": 220 + }, + { + "epoch": 0.598404255319149, + "grad_norm": 0.9868016242980957, + "learning_rate": 2.8235942097911964e-05, + "loss": 0.7118, + "step": 225 + }, + { + "epoch": 0.6117021276595744, + "grad_norm": 1.0691125392913818, + "learning_rate": 2.8131010893716676e-05, + "loss": 0.6994, + "step": 230 + }, + { + "epoch": 0.625, + "grad_norm": 0.9782646894454956, + "learning_rate": 2.8023255127021593e-05, + "loss": 0.6715, + "step": 235 + }, + { + "epoch": 0.6382978723404256, + "grad_norm": 1.07344388961792, + "learning_rate": 2.7912697976765516e-05, + "loss": 0.6679, + "step": 240 + }, + { + "epoch": 0.651595744680851, + "grad_norm": 1.089022159576416, + "learning_rate": 2.7799363224482334e-05, + "loss": 0.635, + "step": 245 + }, + { + "epoch": 0.6648936170212766, + "grad_norm": 1.225973129272461, + "learning_rate": 2.7683275249185507e-05, + "loss": 0.5731, + "step": 250 + }, + { + "epoch": 0.6781914893617021, + "grad_norm": 1.1575313806533813, + "learning_rate": 2.7564459022123953e-05, + "loss": 0.6353, + "step": 255 + }, + { + "epoch": 0.6914893617021277, + "grad_norm": 1.1288249492645264, + "learning_rate": 2.744294010141061e-05, + "loss": 0.6047, + "step": 260 + }, + { + "epoch": 0.7047872340425532, + "grad_norm": 1.089603304862976, + "learning_rate": 2.7318744626524704e-05, + "loss": 0.6118, + "step": 265 + }, + { + "epoch": 0.7180851063829787, + "grad_norm": 1.0996863842010498, + "learning_rate": 2.719189931268899e-05, + "loss": 0.6087, + "step": 270 + }, + { + "epoch": 0.7313829787234043, + "grad_norm": 1.1740212440490723, + "learning_rate": 2.7062431445123127e-05, + "loss": 0.6124, + "step": 275 + }, + { + "epoch": 0.7446808510638298, + "grad_norm": 1.1370967626571655, + "learning_rate": 2.6930368873174493e-05, + "loss": 0.5852, + "step": 280 + }, + { + "epoch": 0.7579787234042553, + "grad_norm": 1.3930827379226685, + "learning_rate": 2.6795740004327584e-05, + "loss": 0.625, + "step": 285 + }, + { + "epoch": 0.7712765957446809, + "grad_norm": 1.5879627466201782, + "learning_rate": 2.665857379809338e-05, + "loss": 0.6212, + "step": 290 + }, + { + "epoch": 0.7845744680851063, + "grad_norm": 1.1873257160186768, + "learning_rate": 2.6518899759780017e-05, + "loss": 0.5377, + "step": 295 + }, + { + "epoch": 0.7978723404255319, + "grad_norm": 1.2386993169784546, + "learning_rate": 2.637674793414596e-05, + "loss": 0.5768, + "step": 300 + }, + { + "epoch": 0.8111702127659575, + "grad_norm": 1.0962989330291748, + "learning_rate": 2.6232148898937223e-05, + "loss": 0.544, + "step": 305 + }, + { + "epoch": 0.824468085106383, + "grad_norm": 1.1933698654174805, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.5176, + "step": 310 + }, + { + "epoch": 0.8377659574468085, + "grad_norm": 1.299896478652954, + "learning_rate": 2.5935734136139407e-05, + "loss": 0.5098, + "step": 315 + }, + { + "epoch": 0.851063829787234, + "grad_norm": 1.0959054231643677, + "learning_rate": 2.5783982169218125e-05, + "loss": 0.5266, + "step": 320 + }, + { + "epoch": 0.8643617021276596, + "grad_norm": 1.2663919925689697, + "learning_rate": 2.5629910500342424e-05, + "loss": 0.4886, + "step": 325 + }, + { + "epoch": 0.8776595744680851, + "grad_norm": 1.6360260248184204, + "learning_rate": 2.5473552271291092e-05, + "loss": 0.4867, + "step": 330 + }, + { + "epoch": 0.8909574468085106, + "grad_norm": 1.2540385723114014, + "learning_rate": 2.531494111569629e-05, + "loss": 0.4562, + "step": 335 + }, + { + "epoch": 0.9042553191489362, + "grad_norm": 1.2848246097564697, + "learning_rate": 2.5154111151808752e-05, + "loss": 0.504, + "step": 340 + }, + { + "epoch": 0.9175531914893617, + "grad_norm": 1.1443147659301758, + "learning_rate": 2.4991096975158757e-05, + "loss": 0.4646, + "step": 345 + }, + { + "epoch": 0.9308510638297872, + "grad_norm": 1.1614288091659546, + "learning_rate": 2.4825933651114375e-05, + "loss": 0.5083, + "step": 350 + }, + { + "epoch": 0.9441489361702128, + "grad_norm": 1.2904808521270752, + "learning_rate": 2.4658656707338733e-05, + "loss": 0.5036, + "step": 355 + }, + { + "epoch": 0.9574468085106383, + "grad_norm": 1.3155778646469116, + "learning_rate": 2.4489302126147768e-05, + "loss": 0.4637, + "step": 360 + }, + { + "epoch": 0.9707446808510638, + "grad_norm": 1.2170337438583374, + "learning_rate": 2.431790633677019e-05, + "loss": 0.4673, + "step": 365 + }, + { + "epoch": 0.9840425531914894, + "grad_norm": 1.1918988227844238, + "learning_rate": 2.414450620751136e-05, + "loss": 0.4542, + "step": 370 + }, + { + "epoch": 0.9973404255319149, + "grad_norm": 1.5364383459091187, + "learning_rate": 2.396913903782268e-05, + "loss": 0.4779, + "step": 375 + }, + { + "epoch": 1.0106382978723405, + "grad_norm": 1.2183704376220703, + "learning_rate": 2.379184255027822e-05, + "loss": 0.3732, + "step": 380 + }, + { + "epoch": 1.023936170212766, + "grad_norm": 1.1790411472320557, + "learning_rate": 2.361265488246039e-05, + "loss": 0.3663, + "step": 385 + }, + { + "epoch": 1.0372340425531914, + "grad_norm": 1.1400433778762817, + "learning_rate": 2.3431614578756304e-05, + "loss": 0.4003, + "step": 390 + }, + { + "epoch": 1.050531914893617, + "grad_norm": 1.1990783214569092, + "learning_rate": 2.3248760582066605e-05, + "loss": 0.3457, + "step": 395 + }, + { + "epoch": 1.0638297872340425, + "grad_norm": 1.1359471082687378, + "learning_rate": 2.306413222542866e-05, + "loss": 0.4014, + "step": 400 + }, + { + "epoch": 1.077127659574468, + "grad_norm": 1.0940519571304321, + "learning_rate": 2.287776922355573e-05, + "loss": 0.3591, + "step": 405 + }, + { + "epoch": 1.0904255319148937, + "grad_norm": 1.167232632637024, + "learning_rate": 2.268971166429412e-05, + "loss": 0.354, + "step": 410 + }, + { + "epoch": 1.1037234042553192, + "grad_norm": 1.3916363716125488, + "learning_rate": 2.25e-05, + "loss": 0.3234, + "step": 415 + }, + { + "epoch": 1.1170212765957448, + "grad_norm": 1.1360234022140503, + "learning_rate": 2.2308675038837887e-05, + "loss": 0.376, + "step": 420 + }, + { + "epoch": 1.1303191489361701, + "grad_norm": 1.4370903968811035, + "learning_rate": 2.2115777936002533e-05, + "loss": 0.3581, + "step": 425 + }, + { + "epoch": 1.1436170212765957, + "grad_norm": 1.2480677366256714, + "learning_rate": 2.192135018486618e-05, + "loss": 0.3309, + "step": 430 + }, + { + "epoch": 1.1569148936170213, + "grad_norm": 1.2043204307556152, + "learning_rate": 2.172543360805308e-05, + "loss": 0.3736, + "step": 435 + }, + { + "epoch": 1.1702127659574468, + "grad_norm": 1.193558931350708, + "learning_rate": 2.152807034844322e-05, + "loss": 0.3187, + "step": 440 + }, + { + "epoch": 1.1835106382978724, + "grad_norm": 1.4606080055236816, + "learning_rate": 2.1329302860107065e-05, + "loss": 0.374, + "step": 445 + }, + { + "epoch": 1.196808510638298, + "grad_norm": 1.1718413829803467, + "learning_rate": 2.1129173899173474e-05, + "loss": 0.3025, + "step": 450 + }, + { + "epoch": 1.2101063829787235, + "grad_norm": 1.1787132024765015, + "learning_rate": 2.0927726514632557e-05, + "loss": 0.2935, + "step": 455 + }, + { + "epoch": 1.2234042553191489, + "grad_norm": 1.1560717821121216, + "learning_rate": 2.072500403907559e-05, + "loss": 0.2843, + "step": 460 + }, + { + "epoch": 1.2367021276595744, + "grad_norm": 1.2688570022583008, + "learning_rate": 2.0521050079373895e-05, + "loss": 0.2622, + "step": 465 + }, + { + "epoch": 1.25, + "grad_norm": 1.1296700239181519, + "learning_rate": 2.0315908507298713e-05, + "loss": 0.2949, + "step": 470 + }, + { + "epoch": 1.2632978723404256, + "grad_norm": 1.1518288850784302, + "learning_rate": 2.0109623450084154e-05, + "loss": 0.2932, + "step": 475 + }, + { + "epoch": 1.2765957446808511, + "grad_norm": 1.132055401802063, + "learning_rate": 1.990223928093511e-05, + "loss": 0.3031, + "step": 480 + }, + { + "epoch": 1.2898936170212765, + "grad_norm": 1.2940478324890137, + "learning_rate": 1.9693800609482318e-05, + "loss": 0.284, + "step": 485 + }, + { + "epoch": 1.3031914893617023, + "grad_norm": 1.2268915176391602, + "learning_rate": 1.9484352272186555e-05, + "loss": 0.3097, + "step": 490 + }, + { + "epoch": 1.3164893617021276, + "grad_norm": 1.1628766059875488, + "learning_rate": 1.9273939322694035e-05, + "loss": 0.3328, + "step": 495 + }, + { + "epoch": 1.3297872340425532, + "grad_norm": 1.1903403997421265, + "learning_rate": 1.906260702214508e-05, + "loss": 0.2862, + "step": 500 + }, + { + "epoch": 1.3430851063829787, + "grad_norm": 1.2326576709747314, + "learning_rate": 1.8850400829438157e-05, + "loss": 0.2785, + "step": 505 + }, + { + "epoch": 1.3563829787234043, + "grad_norm": 1.3859754800796509, + "learning_rate": 1.8637366391451414e-05, + "loss": 0.2997, + "step": 510 + }, + { + "epoch": 1.3696808510638299, + "grad_norm": 1.1462424993515015, + "learning_rate": 1.842354953322373e-05, + "loss": 0.2877, + "step": 515 + }, + { + "epoch": 1.3829787234042552, + "grad_norm": 1.1558425426483154, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.2646, + "step": 520 + }, + { + "epoch": 1.3962765957446808, + "grad_norm": 1.315887212753296, + "learning_rate": 1.7993752687825003e-05, + "loss": 0.2403, + "step": 525 + }, + { + "epoch": 1.4095744680851063, + "grad_norm": 1.1123799085617065, + "learning_rate": 1.777786515264123e-05, + "loss": 0.2723, + "step": 530 + }, + { + "epoch": 1.422872340425532, + "grad_norm": 1.2034995555877686, + "learning_rate": 1.7561380081304063e-05, + "loss": 0.293, + "step": 535 + }, + { + "epoch": 1.4361702127659575, + "grad_norm": 1.5328879356384277, + "learning_rate": 1.7344344041105177e-05, + "loss": 0.2588, + "step": 540 + }, + { + "epoch": 1.449468085106383, + "grad_norm": 1.2087551355361938, + "learning_rate": 1.7126803717853086e-05, + "loss": 0.272, + "step": 545 + }, + { + "epoch": 1.4627659574468086, + "grad_norm": 1.1468896865844727, + "learning_rate": 1.6908805905830752e-05, + "loss": 0.2367, + "step": 550 + }, + { + "epoch": 1.476063829787234, + "grad_norm": 1.2406046390533447, + "learning_rate": 1.6690397497729818e-05, + "loss": 0.3036, + "step": 555 + }, + { + "epoch": 1.4893617021276595, + "grad_norm": 1.2381621599197388, + "learning_rate": 1.647162547456372e-05, + "loss": 0.2535, + "step": 560 + }, + { + "epoch": 1.502659574468085, + "grad_norm": 1.0908373594284058, + "learning_rate": 1.6252536895561754e-05, + "loss": 0.2683, + "step": 565 + }, + { + "epoch": 1.5159574468085106, + "grad_norm": 1.1827088594436646, + "learning_rate": 1.6033178888046368e-05, + "loss": 0.2693, + "step": 570 + }, + { + "epoch": 1.5292553191489362, + "grad_norm": 1.1324365139007568, + "learning_rate": 1.5813598637295767e-05, + "loss": 0.2379, + "step": 575 + }, + { + "epoch": 1.5425531914893615, + "grad_norm": 1.1748486757278442, + "learning_rate": 1.5593843376394043e-05, + "loss": 0.2096, + "step": 580 + }, + { + "epoch": 1.5558510638297873, + "grad_norm": 1.440712571144104, + "learning_rate": 1.5373960376071095e-05, + "loss": 0.2292, + "step": 585 + }, + { + "epoch": 1.5691489361702127, + "grad_norm": 1.2117944955825806, + "learning_rate": 1.515399693453435e-05, + "loss": 0.2003, + "step": 590 + }, + { + "epoch": 1.5824468085106385, + "grad_norm": 1.3705322742462158, + "learning_rate": 1.493400036729465e-05, + "loss": 0.2297, + "step": 595 + }, + { + "epoch": 1.5957446808510638, + "grad_norm": 1.1308857202529907, + "learning_rate": 1.4714017996988384e-05, + "loss": 0.2363, + "step": 600 + }, + { + "epoch": 1.6090425531914894, + "grad_norm": 1.2247810363769531, + "learning_rate": 1.4494097143198083e-05, + "loss": 0.2037, + "step": 605 + }, + { + "epoch": 1.622340425531915, + "grad_norm": 1.100001573562622, + "learning_rate": 1.4274285112273701e-05, + "loss": 0.2058, + "step": 610 + }, + { + "epoch": 1.6356382978723403, + "grad_norm": 1.1288983821868896, + "learning_rate": 1.4054629187156702e-05, + "loss": 0.2101, + "step": 615 + }, + { + "epoch": 1.648936170212766, + "grad_norm": 1.2534788846969604, + "learning_rate": 1.3835176617209241e-05, + "loss": 0.2093, + "step": 620 + }, + { + "epoch": 1.6622340425531914, + "grad_norm": 1.0175725221633911, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2049, + "step": 625 + }, + { + "epoch": 1.675531914893617, + "grad_norm": 1.0500127077102661, + "learning_rate": 1.3397070311402377e-05, + "loss": 0.1919, + "step": 630 + }, + { + "epoch": 1.6888297872340425, + "grad_norm": 1.2308189868927002, + "learning_rate": 1.3178510814947112e-05, + "loss": 0.2182, + "step": 635 + }, + { + "epoch": 1.702127659574468, + "grad_norm": 1.12785005569458, + "learning_rate": 1.296034313219816e-05, + "loss": 0.1814, + "step": 640 + }, + { + "epoch": 1.7154255319148937, + "grad_norm": 1.1110854148864746, + "learning_rate": 1.2742614192387417e-05, + "loss": 0.1865, + "step": 645 + }, + { + "epoch": 1.728723404255319, + "grad_norm": 1.2259209156036377, + "learning_rate": 1.2525370830370447e-05, + "loss": 0.1987, + "step": 650 + }, + { + "epoch": 1.7420212765957448, + "grad_norm": 1.1777424812316895, + "learning_rate": 1.2308659776551985e-05, + "loss": 0.1799, + "step": 655 + }, + { + "epoch": 1.7553191489361701, + "grad_norm": 1.321239709854126, + "learning_rate": 1.209252764683395e-05, + "loss": 0.1862, + "step": 660 + }, + { + "epoch": 1.7686170212765957, + "grad_norm": 1.2250465154647827, + "learning_rate": 1.1877020932588067e-05, + "loss": 0.1765, + "step": 665 + }, + { + "epoch": 1.7819148936170213, + "grad_norm": 1.1053019762039185, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.1529, + "step": 670 + }, + { + "epoch": 1.7952127659574468, + "grad_norm": 1.353917121887207, + "learning_rate": 1.1448069033374135e-05, + "loss": 0.2006, + "step": 675 + }, + { + "epoch": 1.8085106382978724, + "grad_norm": 1.2655768394470215, + "learning_rate": 1.1234716118640149e-05, + "loss": 0.1817, + "step": 680 + }, + { + "epoch": 1.8218085106382977, + "grad_norm": 1.2838915586471558, + "learning_rate": 1.1022173139998556e-05, + "loss": 0.1755, + "step": 685 + }, + { + "epoch": 1.8351063829787235, + "grad_norm": 1.1840317249298096, + "learning_rate": 1.0810485816772251e-05, + "loss": 0.1841, + "step": 690 + }, + { + "epoch": 1.8484042553191489, + "grad_norm": 1.1475826501846313, + "learning_rate": 1.0599699684227313e-05, + "loss": 0.181, + "step": 695 + }, + { + "epoch": 1.8617021276595744, + "grad_norm": 1.2207974195480347, + "learning_rate": 1.0389860083778056e-05, + "loss": 0.1851, + "step": 700 + }, + { + "epoch": 1.875, + "grad_norm": 0.9976739883422852, + "learning_rate": 1.0181012153233851e-05, + "loss": 0.159, + "step": 705 + }, + { + "epoch": 1.8882978723404256, + "grad_norm": 1.080603837966919, + "learning_rate": 9.973200817089655e-06, + "loss": 0.1671, + "step": 710 + }, + { + "epoch": 1.9015957446808511, + "grad_norm": 1.3108693361282349, + "learning_rate": 9.7664707768625e-06, + "loss": 0.1568, + "step": 715 + }, + { + "epoch": 1.9148936170212765, + "grad_norm": 0.9820380806922913, + "learning_rate": 9.560866501475913e-06, + "loss": 0.1505, + "step": 720 + }, + { + "epoch": 1.9281914893617023, + "grad_norm": 1.1379504203796387, + "learning_rate": 9.35643221769436e-06, + "loss": 0.1496, + "step": 725 + }, + { + "epoch": 1.9414893617021276, + "grad_norm": 1.0877091884613037, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1463, + "step": 730 + }, + { + "epoch": 1.9547872340425532, + "grad_norm": 1.249118447303772, + "learning_rate": 8.951249264182403e-06, + "loss": 0.1753, + "step": 735 + }, + { + "epoch": 1.9680851063829787, + "grad_norm": 1.0539417266845703, + "learning_rate": 8.750587751837313e-06, + "loss": 0.1741, + "step": 740 + }, + { + "epoch": 1.9813829787234043, + "grad_norm": 1.1529797315597534, + "learning_rate": 8.551270527119784e-06, + "loss": 0.1542, + "step": 745 + }, + { + "epoch": 1.9946808510638299, + "grad_norm": 1.0125025510787964, + "learning_rate": 8.35334046441041e-06, + "loss": 0.1573, + "step": 750 + }, + { + "epoch": 2.007978723404255, + "grad_norm": 1.345078706741333, + "learning_rate": 8.156840139702554e-06, + "loss": 0.1299, + "step": 755 + }, + { + "epoch": 2.021276595744681, + "grad_norm": 0.9697524309158325, + "learning_rate": 7.961811821444008e-06, + "loss": 0.1177, + "step": 760 + }, + { + "epoch": 2.0345744680851063, + "grad_norm": 1.138466715812683, + "learning_rate": 7.768297461444766e-06, + "loss": 0.1301, + "step": 765 + }, + { + "epoch": 2.047872340425532, + "grad_norm": 1.03352689743042, + "learning_rate": 7.576338685852955e-06, + "loss": 0.1283, + "step": 770 + }, + { + "epoch": 2.0611702127659575, + "grad_norm": 0.9989078044891357, + "learning_rate": 7.385976786200765e-06, + "loss": 0.112, + "step": 775 + }, + { + "epoch": 2.074468085106383, + "grad_norm": 1.1039037704467773, + "learning_rate": 7.197252710522395e-06, + "loss": 0.1307, + "step": 780 + }, + { + "epoch": 2.0877659574468086, + "grad_norm": 0.8923791646957397, + "learning_rate": 7.010207054545873e-06, + "loss": 0.1127, + "step": 785 + }, + { + "epoch": 2.101063829787234, + "grad_norm": 1.0910581350326538, + "learning_rate": 6.8248800529606604e-06, + "loss": 0.1305, + "step": 790 + }, + { + "epoch": 2.1143617021276597, + "grad_norm": 1.025908350944519, + "learning_rate": 6.641311570762918e-06, + "loss": 0.1163, + "step": 795 + }, + { + "epoch": 2.127659574468085, + "grad_norm": 0.9809995889663696, + "learning_rate": 6.4595410946803e-06, + "loss": 0.1213, + "step": 800 + }, + { + "epoch": 2.1409574468085104, + "grad_norm": 0.9388774633407593, + "learning_rate": 6.2796077246781046e-06, + "loss": 0.1048, + "step": 805 + }, + { + "epoch": 2.154255319148936, + "grad_norm": 1.0608594417572021, + "learning_rate": 6.1015501655486365e-06, + "loss": 0.1164, + "step": 810 + }, + { + "epoch": 2.1675531914893615, + "grad_norm": 0.9531776905059814, + "learning_rate": 5.925406718585552e-06, + "loss": 0.118, + "step": 815 + }, + { + "epoch": 2.1808510638297873, + "grad_norm": 0.962715208530426, + "learning_rate": 5.751215273345036e-06, + "loss": 0.1113, + "step": 820 + }, + { + "epoch": 2.1941489361702127, + "grad_norm": 1.0311927795410156, + "learning_rate": 5.5790132994954935e-06, + "loss": 0.1285, + "step": 825 + }, + { + "epoch": 2.2074468085106385, + "grad_norm": 0.8641161918640137, + "learning_rate": 5.408837838757588e-06, + "loss": 0.1119, + "step": 830 + }, + { + "epoch": 2.220744680851064, + "grad_norm": 0.9226041436195374, + "learning_rate": 5.240725496936373e-06, + "loss": 0.1012, + "step": 835 + }, + { + "epoch": 2.2340425531914896, + "grad_norm": 0.9216217398643494, + "learning_rate": 5.0747124360471125e-06, + "loss": 0.0935, + "step": 840 + }, + { + "epoch": 2.247340425531915, + "grad_norm": 1.0029330253601074, + "learning_rate": 4.910834366536631e-06, + "loss": 0.1051, + "step": 845 + }, + { + "epoch": 2.2606382978723403, + "grad_norm": 0.9193028211593628, + "learning_rate": 4.74912653960177e-06, + "loss": 0.092, + "step": 850 + }, + { + "epoch": 2.273936170212766, + "grad_norm": 0.9708936214447021, + "learning_rate": 4.589623739606625e-06, + "loss": 0.1069, + "step": 855 + }, + { + "epoch": 2.2872340425531914, + "grad_norm": 0.9913526773452759, + "learning_rate": 4.4323602766002165e-06, + "loss": 0.1081, + "step": 860 + }, + { + "epoch": 2.300531914893617, + "grad_norm": 0.7948131561279297, + "learning_rate": 4.277369978936188e-06, + "loss": 0.1006, + "step": 865 + }, + { + "epoch": 2.3138297872340425, + "grad_norm": 1.0672922134399414, + "learning_rate": 4.1246861859961114e-06, + "loss": 0.1114, + "step": 870 + }, + { + "epoch": 2.327127659574468, + "grad_norm": 0.8179038763046265, + "learning_rate": 3.974341741017978e-06, + "loss": 0.0813, + "step": 875 + }, + { + "epoch": 2.3404255319148937, + "grad_norm": 0.6714320182800293, + "learning_rate": 3.826368984031414e-06, + "loss": 0.1057, + "step": 880 + }, + { + "epoch": 2.353723404255319, + "grad_norm": 0.8225026726722717, + "learning_rate": 3.6807997449011426e-06, + "loss": 0.1197, + "step": 885 + }, + { + "epoch": 2.367021276595745, + "grad_norm": 0.8705376982688904, + "learning_rate": 3.5376653364801703e-06, + "loss": 0.0937, + "step": 890 + }, + { + "epoch": 2.38031914893617, + "grad_norm": 0.8327345252037048, + "learning_rate": 3.3969965478742038e-06, + "loss": 0.0938, + "step": 895 + }, + { + "epoch": 2.393617021276596, + "grad_norm": 1.0116803646087646, + "learning_rate": 3.258823637818722e-06, + "loss": 0.0933, + "step": 900 + }, + { + "epoch": 2.4069148936170213, + "grad_norm": 0.9232926368713379, + "learning_rate": 3.123176328170131e-06, + "loss": 0.1128, + "step": 905 + }, + { + "epoch": 2.420212765957447, + "grad_norm": 0.8569035530090332, + "learning_rate": 2.990083797512401e-06, + "loss": 0.1046, + "step": 910 + }, + { + "epoch": 2.4335106382978724, + "grad_norm": 0.9415323734283447, + "learning_rate": 2.8595746748805805e-06, + "loss": 0.1063, + "step": 915 + }, + { + "epoch": 2.4468085106382977, + "grad_norm": 0.8304966688156128, + "learning_rate": 2.7316770336025166e-06, + "loss": 0.0895, + "step": 920 + }, + { + "epoch": 2.4601063829787235, + "grad_norm": 0.6607825756072998, + "learning_rate": 2.60641838526008e-06, + "loss": 0.0916, + "step": 925 + }, + { + "epoch": 2.473404255319149, + "grad_norm": 1.076198935508728, + "learning_rate": 2.483825673771279e-06, + "loss": 0.0968, + "step": 930 + }, + { + "epoch": 2.4867021276595747, + "grad_norm": 0.8798213601112366, + "learning_rate": 2.363925269594449e-06, + "loss": 0.0926, + "step": 935 + }, + { + "epoch": 2.5, + "grad_norm": 0.9128975868225098, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.1097, + "step": 940 + }, + { + "epoch": 2.5132978723404253, + "grad_norm": 0.7990127205848694, + "learning_rate": 2.1323039638015024e-06, + "loss": 0.0995, + "step": 945 + }, + { + "epoch": 2.526595744680851, + "grad_norm": 0.8091543912887573, + "learning_rate": 2.020632885375684e-06, + "loss": 0.0931, + "step": 950 + }, + { + "epoch": 2.5398936170212765, + "grad_norm": 1.08230721950531, + "learning_rate": 1.9117537499251416e-06, + "loss": 0.0894, + "step": 955 + }, + { + "epoch": 2.5531914893617023, + "grad_norm": 0.7366005778312683, + "learning_rate": 1.8056899780323016e-06, + "loss": 0.0876, + "step": 960 + }, + { + "epoch": 2.5664893617021276, + "grad_norm": 0.8424212336540222, + "learning_rate": 1.7024643846772981e-06, + "loss": 0.0869, + "step": 965 + }, + { + "epoch": 2.579787234042553, + "grad_norm": 0.8184525370597839, + "learning_rate": 1.6020991743303264e-06, + "loss": 0.097, + "step": 970 + }, + { + "epoch": 2.5930851063829787, + "grad_norm": 0.6666234731674194, + "learning_rate": 1.5046159361753226e-06, + "loss": 0.0863, + "step": 975 + }, + { + "epoch": 2.6063829787234045, + "grad_norm": 0.6764509081840515, + "learning_rate": 1.4100356394659863e-06, + "loss": 0.0825, + "step": 980 + }, + { + "epoch": 2.61968085106383, + "grad_norm": 0.8164499998092651, + "learning_rate": 1.318378629015184e-06, + "loss": 0.0817, + "step": 985 + }, + { + "epoch": 2.632978723404255, + "grad_norm": 0.6372337341308594, + "learning_rate": 1.229664620818633e-06, + "loss": 0.0929, + "step": 990 + }, + { + "epoch": 2.646276595744681, + "grad_norm": 0.6811972856521606, + "learning_rate": 1.1439126978138769e-06, + "loss": 0.0906, + "step": 995 + }, + { + "epoch": 2.6595744680851063, + "grad_norm": 0.6908390522003174, + "learning_rate": 1.0611413057754221e-06, + "loss": 0.0834, + "step": 1000 + }, + { + "epoch": 2.672872340425532, + "grad_norm": 0.6784895658493042, + "learning_rate": 9.813682493469396e-07, + "loss": 0.0891, + "step": 1005 + }, + { + "epoch": 2.6861702127659575, + "grad_norm": 0.6969872117042542, + "learning_rate": 9.046106882113753e-07, + "loss": 0.0835, + "step": 1010 + }, + { + "epoch": 2.699468085106383, + "grad_norm": 0.8926970958709717, + "learning_rate": 8.308851333997918e-07, + "loss": 0.0815, + "step": 1015 + }, + { + "epoch": 2.7127659574468086, + "grad_norm": 0.7531631588935852, + "learning_rate": 7.602074437397455e-07, + "loss": 0.085, + "step": 1020 + }, + { + "epoch": 2.726063829787234, + "grad_norm": 0.6779833436012268, + "learning_rate": 6.925928224439532e-07, + "loss": 0.0724, + "step": 1025 + }, + { + "epoch": 2.7393617021276597, + "grad_norm": 0.7985602617263794, + "learning_rate": 6.280558138399805e-07, + "loss": 0.0851, + "step": 1030 + }, + { + "epoch": 2.752659574468085, + "grad_norm": 0.7332150936126709, + "learning_rate": 5.666103002416762e-07, + "loss": 0.0811, + "step": 1035 + }, + { + "epoch": 2.7659574468085104, + "grad_norm": 0.7957667708396912, + "learning_rate": 5.082694989629916e-07, + "loss": 0.073, + "step": 1040 + }, + { + "epoch": 2.779255319148936, + "grad_norm": 0.7978726029396057, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.088, + "step": 1045 + }, + { + "epoch": 2.7925531914893615, + "grad_norm": 0.88449627161026, + "learning_rate": 4.0095156070571513e-07, + "loss": 0.0758, + "step": 1050 + }, + { + "epoch": 2.8058510638297873, + "grad_norm": 0.6900007724761963, + "learning_rate": 3.5199750848627753e-07, + "loss": 0.0863, + "step": 1055 + }, + { + "epoch": 2.8191489361702127, + "grad_norm": 0.688105583190918, + "learning_rate": 3.0619433313909706e-07, + "loss": 0.1025, + "step": 1060 + }, + { + "epoch": 2.8324468085106385, + "grad_norm": 0.8233679533004761, + "learning_rate": 2.635518872134185e-07, + "loss": 0.0964, + "step": 1065 + }, + { + "epoch": 2.845744680851064, + "grad_norm": 0.7394217252731323, + "learning_rate": 2.2407934336583446e-07, + "loss": 0.0835, + "step": 1070 + }, + { + "epoch": 2.8590425531914896, + "grad_norm": 0.8431270718574524, + "learning_rate": 1.8778519238719204e-07, + "loss": 0.094, + "step": 1075 + }, + { + "epoch": 2.872340425531915, + "grad_norm": 0.6894851922988892, + "learning_rate": 1.5467724137617046e-07, + "loss": 0.0958, + "step": 1080 + }, + { + "epoch": 2.8856382978723403, + "grad_norm": 0.7746151685714722, + "learning_rate": 1.2476261205992934e-07, + "loss": 0.0777, + "step": 1085 + }, + { + "epoch": 2.898936170212766, + "grad_norm": 0.7858954071998596, + "learning_rate": 9.804773926217092e-08, + "loss": 0.0853, + "step": 1090 + }, + { + "epoch": 2.9122340425531914, + "grad_norm": 0.7193872928619385, + "learning_rate": 7.453836951897885e-08, + "loss": 0.0775, + "step": 1095 + }, + { + "epoch": 2.925531914893617, + "grad_norm": 0.5978102684020996, + "learning_rate": 5.4239559842695354e-08, + "loss": 0.0803, + "step": 1100 + }, + { + "epoch": 2.9388297872340425, + "grad_norm": 0.6856487393379211, + "learning_rate": 3.715567663412966e-08, + "loss": 0.0879, + "step": 1105 + }, + { + "epoch": 2.952127659574468, + "grad_norm": 0.9020809531211853, + "learning_rate": 2.3290394743317732e-08, + "loss": 0.0894, + "step": 1110 + }, + { + "epoch": 2.9654255319148937, + "grad_norm": 0.7106974124908447, + "learning_rate": 1.2646696679042835e-08, + "loss": 0.0734, + "step": 1115 + }, + { + "epoch": 2.978723404255319, + "grad_norm": 0.6694255471229553, + "learning_rate": 5.2268719672671215e-09, + "loss": 0.0823, + "step": 1120 + }, + { + "epoch": 2.992021276595745, + "grad_norm": 0.7845897078514099, + "learning_rate": 1.0325166586572233e-09, + "loss": 0.0933, + "step": 1125 + }, + { + "epoch": 3.0, + "step": 1128, + "total_flos": 1.4339270676987249e+18, + "train_loss": 0.40115166138461295, + "train_runtime": 1136.1457, + "train_samples_per_second": 31.771, + "train_steps_per_second": 0.993 + } + ], + "logging_steps": 5, + "max_steps": 1128, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4339270676987249e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..239c3b4c622fb71060f5c109f77a4dd903d80341 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/19_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50156d6c64ad06b862401f9952779c96ca5a3a8a7c9a7ca1f40d937682216d3c +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4682aa153f7f57a7a6b610ef7439a113fa4a2c90 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 1_128_e3_3e-5 + results: [] +--- + + + +# 1_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..132b2d1c6a219e392138fd77431cdd54354e492a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "gate_proj", + "o_proj", + "up_proj", + "q_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e559280e7aca1269b09a8a81a59b42ce9f27e41 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f310ce89cb15f3ecf591bf8af6875009126419a07772205e2f38f12af5e6133a +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..228156e4019a6b16dff9f3b802314b5e77d71b2a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2938989562746634e+18, + "train_loss": 0.4432945014635149, + "train_runtime": 1032.7578, + "train_samples": 11101, + "train_samples_per_second": 32.247, + "train_steps_per_second": 1.008 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..228156e4019a6b16dff9f3b802314b5e77d71b2a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2938989562746634e+18, + "train_loss": 0.4432945014635149, + "train_runtime": 1032.7578, + "train_samples": 11101, + "train_samples_per_second": 32.247, + "train_steps_per_second": 1.008 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5b04a35e5c26ba3755047fc90f4b44d70c8feec7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1499 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1041, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01440922190201729, + "grad_norm": 0.6326138973236084, + "learning_rate": 2.2641509433962262e-06, + "loss": 1.5775, + "step": 5 + }, + { + "epoch": 0.02881844380403458, + "grad_norm": 0.5890294909477234, + "learning_rate": 5.094339622641509e-06, + "loss": 1.5447, + "step": 10 + }, + { + "epoch": 0.043227665706051875, + "grad_norm": 0.5766253471374512, + "learning_rate": 7.924528301886793e-06, + "loss": 1.6132, + "step": 15 + }, + { + "epoch": 0.05763688760806916, + "grad_norm": 0.5280054211616516, + "learning_rate": 1.0754716981132076e-05, + "loss": 1.4977, + "step": 20 + }, + { + "epoch": 0.07204610951008646, + "grad_norm": 0.5059104561805725, + "learning_rate": 1.358490566037736e-05, + "loss": 1.5393, + "step": 25 + }, + { + "epoch": 0.08645533141210375, + "grad_norm": 0.5307747721672058, + "learning_rate": 1.6415094339622643e-05, + "loss": 1.5136, + "step": 30 + }, + { + "epoch": 0.10086455331412104, + "grad_norm": 0.47110551595687866, + "learning_rate": 1.9245283018867924e-05, + "loss": 1.4598, + "step": 35 + }, + { + "epoch": 0.11527377521613832, + "grad_norm": 0.4995555579662323, + "learning_rate": 2.2075471698113208e-05, + "loss": 1.4478, + "step": 40 + }, + { + "epoch": 0.12968299711815562, + "grad_norm": 0.5440229177474976, + "learning_rate": 2.4905660377358492e-05, + "loss": 1.4401, + "step": 45 + }, + { + "epoch": 0.1440922190201729, + "grad_norm": 0.5636662244796753, + "learning_rate": 2.7735849056603773e-05, + "loss": 1.4337, + "step": 50 + }, + { + "epoch": 0.1585014409221902, + "grad_norm": 0.5433523058891296, + "learning_rate": 2.9999924169005146e-05, + "loss": 1.3849, + "step": 55 + }, + { + "epoch": 0.1729106628242075, + "grad_norm": 0.5575237274169922, + "learning_rate": 2.9997270164689188e-05, + "loss": 1.3548, + "step": 60 + }, + { + "epoch": 0.1873198847262248, + "grad_norm": 0.6288254857063293, + "learning_rate": 2.999082537730771e-05, + "loss": 1.3145, + "step": 65 + }, + { + "epoch": 0.2017291066282421, + "grad_norm": 0.6488506197929382, + "learning_rate": 2.998059143587657e-05, + "loss": 1.3585, + "step": 70 + }, + { + "epoch": 0.21613832853025935, + "grad_norm": 0.7240475416183472, + "learning_rate": 2.9966570927176653e-05, + "loss": 1.27, + "step": 75 + }, + { + "epoch": 0.23054755043227665, + "grad_norm": 0.6255141496658325, + "learning_rate": 2.994876739510005e-05, + "loss": 1.2684, + "step": 80 + }, + { + "epoch": 0.24495677233429394, + "grad_norm": 0.6786113977432251, + "learning_rate": 2.9927185339754245e-05, + "loss": 1.2098, + "step": 85 + }, + { + "epoch": 0.25936599423631124, + "grad_norm": 0.6742846965789795, + "learning_rate": 2.9901830216324694e-05, + "loss": 1.2823, + "step": 90 + }, + { + "epoch": 0.2737752161383285, + "grad_norm": 0.73334139585495, + "learning_rate": 2.9872708433695907e-05, + "loss": 1.2154, + "step": 95 + }, + { + "epoch": 0.2881844380403458, + "grad_norm": 0.7575238943099976, + "learning_rate": 2.9839827352831522e-05, + "loss": 1.1448, + "step": 100 + }, + { + "epoch": 0.3025936599423631, + "grad_norm": 0.7394768595695496, + "learning_rate": 2.980319528491373e-05, + "loss": 1.1314, + "step": 105 + }, + { + "epoch": 0.3170028818443804, + "grad_norm": 0.7666486501693726, + "learning_rate": 2.976282148924246e-05, + "loss": 1.131, + "step": 110 + }, + { + "epoch": 0.3314121037463977, + "grad_norm": 2.4404919147491455, + "learning_rate": 2.9718716170894987e-05, + "loss": 1.1018, + "step": 115 + }, + { + "epoch": 0.345821325648415, + "grad_norm": 0.7966125011444092, + "learning_rate": 2.967089047814643e-05, + "loss": 1.0528, + "step": 120 + }, + { + "epoch": 0.36023054755043227, + "grad_norm": 0.8069521188735962, + "learning_rate": 2.961935649965188e-05, + "loss": 1.1209, + "step": 125 + }, + { + "epoch": 0.3746397694524496, + "grad_norm": 0.8277975916862488, + "learning_rate": 2.956412726139078e-05, + "loss": 1.0631, + "step": 130 + }, + { + "epoch": 0.38904899135446686, + "grad_norm": 1.1069897413253784, + "learning_rate": 2.9505216723374442e-05, + "loss": 1.0522, + "step": 135 + }, + { + "epoch": 0.4034582132564842, + "grad_norm": 0.8426180481910706, + "learning_rate": 2.9442639776117436e-05, + "loss": 1.0033, + "step": 140 + }, + { + "epoch": 0.41786743515850144, + "grad_norm": 0.8600759506225586, + "learning_rate": 2.9376412236873792e-05, + "loss": 0.9838, + "step": 145 + }, + { + "epoch": 0.4322766570605187, + "grad_norm": 0.9865477085113525, + "learning_rate": 2.9306550845638953e-05, + "loss": 0.984, + "step": 150 + }, + { + "epoch": 0.44668587896253603, + "grad_norm": 0.9943352937698364, + "learning_rate": 2.9233073260918497e-05, + "loss": 0.955, + "step": 155 + }, + { + "epoch": 0.4610951008645533, + "grad_norm": 0.9495847821235657, + "learning_rate": 2.9155998055264676e-05, + "loss": 0.9497, + "step": 160 + }, + { + "epoch": 0.4755043227665706, + "grad_norm": 1.0473064184188843, + "learning_rate": 2.907534471058195e-05, + "loss": 0.9001, + "step": 165 + }, + { + "epoch": 0.4899135446685879, + "grad_norm": 0.9288561940193176, + "learning_rate": 2.8991133613202615e-05, + "loss": 0.9134, + "step": 170 + }, + { + "epoch": 0.5043227665706052, + "grad_norm": 0.9709214568138123, + "learning_rate": 2.890338604873387e-05, + "loss": 0.896, + "step": 175 + }, + { + "epoch": 0.5187319884726225, + "grad_norm": 1.2848618030548096, + "learning_rate": 2.8812124196677585e-05, + "loss": 0.8703, + "step": 180 + }, + { + "epoch": 0.5331412103746398, + "grad_norm": 1.0134344100952148, + "learning_rate": 2.871737112482405e-05, + "loss": 0.82, + "step": 185 + }, + { + "epoch": 0.547550432276657, + "grad_norm": 1.101289987564087, + "learning_rate": 2.8619150783421303e-05, + "loss": 0.8835, + "step": 190 + }, + { + "epoch": 0.5619596541786743, + "grad_norm": 1.0745699405670166, + "learning_rate": 2.851748799912131e-05, + "loss": 0.8288, + "step": 195 + }, + { + "epoch": 0.5763688760806917, + "grad_norm": 1.0891857147216797, + "learning_rate": 2.8412408468704673e-05, + "loss": 0.855, + "step": 200 + }, + { + "epoch": 0.590778097982709, + "grad_norm": 1.0328277349472046, + "learning_rate": 2.83039387525854e-05, + "loss": 0.8235, + "step": 205 + }, + { + "epoch": 0.6051873198847262, + "grad_norm": 1.153303623199463, + "learning_rate": 2.8192106268097336e-05, + "loss": 0.8253, + "step": 210 + }, + { + "epoch": 0.6195965417867435, + "grad_norm": 1.1520222425460815, + "learning_rate": 2.8076939282564054e-05, + "loss": 0.768, + "step": 215 + }, + { + "epoch": 0.6340057636887608, + "grad_norm": 1.3856819868087769, + "learning_rate": 2.795846690615385e-05, + "loss": 0.7161, + "step": 220 + }, + { + "epoch": 0.6484149855907781, + "grad_norm": 1.2162357568740845, + "learning_rate": 2.7836719084521714e-05, + "loss": 0.7387, + "step": 225 + }, + { + "epoch": 0.6628242074927954, + "grad_norm": 1.3025133609771729, + "learning_rate": 2.7711726591240133e-05, + "loss": 0.6679, + "step": 230 + }, + { + "epoch": 0.6772334293948127, + "grad_norm": 1.1838921308517456, + "learning_rate": 2.7583521020020615e-05, + "loss": 0.6589, + "step": 235 + }, + { + "epoch": 0.69164265129683, + "grad_norm": 1.24068021774292, + "learning_rate": 2.7452134776727875e-05, + "loss": 0.6627, + "step": 240 + }, + { + "epoch": 0.7060518731988472, + "grad_norm": 1.1494094133377075, + "learning_rate": 2.7317601071188823e-05, + "loss": 0.7173, + "step": 245 + }, + { + "epoch": 0.7204610951008645, + "grad_norm": 1.1995824575424194, + "learning_rate": 2.7179953908798246e-05, + "loss": 0.6877, + "step": 250 + }, + { + "epoch": 0.7348703170028819, + "grad_norm": 1.2257663011550903, + "learning_rate": 2.7039228081923448e-05, + "loss": 0.6837, + "step": 255 + }, + { + "epoch": 0.7492795389048992, + "grad_norm": 1.5328586101531982, + "learning_rate": 2.6895459161109978e-05, + "loss": 0.6802, + "step": 260 + }, + { + "epoch": 0.7636887608069164, + "grad_norm": 1.1155517101287842, + "learning_rate": 2.6748683486090616e-05, + "loss": 0.6363, + "step": 265 + }, + { + "epoch": 0.7780979827089337, + "grad_norm": 1.1815747022628784, + "learning_rate": 2.6598938156600005e-05, + "loss": 0.5767, + "step": 270 + }, + { + "epoch": 0.792507204610951, + "grad_norm": 1.1503313779830933, + "learning_rate": 2.6446261022997098e-05, + "loss": 0.6562, + "step": 275 + }, + { + "epoch": 0.8069164265129684, + "grad_norm": 1.3893643617630005, + "learning_rate": 2.629069067669795e-05, + "loss": 0.5605, + "step": 280 + }, + { + "epoch": 0.8213256484149856, + "grad_norm": 1.2385926246643066, + "learning_rate": 2.613226644042114e-05, + "loss": 0.5196, + "step": 285 + }, + { + "epoch": 0.8357348703170029, + "grad_norm": 1.2617326974868774, + "learning_rate": 2.5971028358248396e-05, + "loss": 0.5931, + "step": 290 + }, + { + "epoch": 0.8501440922190202, + "grad_norm": 1.256900429725647, + "learning_rate": 2.5807017185502833e-05, + "loss": 0.5632, + "step": 295 + }, + { + "epoch": 0.8645533141210374, + "grad_norm": 1.1927273273468018, + "learning_rate": 2.5640274378447444e-05, + "loss": 0.593, + "step": 300 + }, + { + "epoch": 0.8789625360230547, + "grad_norm": 1.314105749130249, + "learning_rate": 2.5470842083806424e-05, + "loss": 0.6078, + "step": 305 + }, + { + "epoch": 0.8933717579250721, + "grad_norm": 1.3578660488128662, + "learning_rate": 2.5298763128111956e-05, + "loss": 0.5174, + "step": 310 + }, + { + "epoch": 0.9077809798270894, + "grad_norm": 1.184133768081665, + "learning_rate": 2.5124081006879148e-05, + "loss": 0.4964, + "step": 315 + }, + { + "epoch": 0.9221902017291066, + "grad_norm": 1.2772337198257446, + "learning_rate": 2.494683987361193e-05, + "loss": 0.5529, + "step": 320 + }, + { + "epoch": 0.9365994236311239, + "grad_norm": 1.4075205326080322, + "learning_rate": 2.4767084528642564e-05, + "loss": 0.5291, + "step": 325 + }, + { + "epoch": 0.9510086455331412, + "grad_norm": 1.2829254865646362, + "learning_rate": 2.458486040780772e-05, + "loss": 0.4804, + "step": 330 + }, + { + "epoch": 0.9654178674351584, + "grad_norm": 1.2873066663742065, + "learning_rate": 2.440021357096388e-05, + "loss": 0.5288, + "step": 335 + }, + { + "epoch": 0.9798270893371758, + "grad_norm": 1.575387716293335, + "learning_rate": 2.4213190690345018e-05, + "loss": 0.5293, + "step": 340 + }, + { + "epoch": 0.9942363112391931, + "grad_norm": 1.3900673389434814, + "learning_rate": 2.4023839038765525e-05, + "loss": 0.4524, + "step": 345 + }, + { + "epoch": 1.0086455331412103, + "grad_norm": 1.2405873537063599, + "learning_rate": 2.383220647767127e-05, + "loss": 0.4189, + "step": 350 + }, + { + "epoch": 1.0230547550432276, + "grad_norm": 1.3487048149108887, + "learning_rate": 2.363834144504192e-05, + "loss": 0.4418, + "step": 355 + }, + { + "epoch": 1.037463976945245, + "grad_norm": 1.2903871536254883, + "learning_rate": 2.3442292943147543e-05, + "loss": 0.4139, + "step": 360 + }, + { + "epoch": 1.0518731988472623, + "grad_norm": 1.2593207359313965, + "learning_rate": 2.324411052616251e-05, + "loss": 0.3843, + "step": 365 + }, + { + "epoch": 1.0662824207492796, + "grad_norm": 1.2757985591888428, + "learning_rate": 2.304384428763998e-05, + "loss": 0.4293, + "step": 370 + }, + { + "epoch": 1.080691642651297, + "grad_norm": 1.4238191843032837, + "learning_rate": 2.2841544847849994e-05, + "loss": 0.3857, + "step": 375 + }, + { + "epoch": 1.0951008645533142, + "grad_norm": 1.4944229125976562, + "learning_rate": 2.2637263340984446e-05, + "loss": 0.4294, + "step": 380 + }, + { + "epoch": 1.1095100864553313, + "grad_norm": 1.2778730392456055, + "learning_rate": 2.2431051402232164e-05, + "loss": 0.3699, + "step": 385 + }, + { + "epoch": 1.1239193083573487, + "grad_norm": 1.5316014289855957, + "learning_rate": 2.2222961154727346e-05, + "loss": 0.3332, + "step": 390 + }, + { + "epoch": 1.138328530259366, + "grad_norm": 1.3182975053787231, + "learning_rate": 2.2013045196374645e-05, + "loss": 0.4021, + "step": 395 + }, + { + "epoch": 1.1527377521613833, + "grad_norm": 1.5662586688995361, + "learning_rate": 2.1801356586554298e-05, + "loss": 0.3619, + "step": 400 + }, + { + "epoch": 1.1671469740634006, + "grad_norm": 1.265500545501709, + "learning_rate": 2.1587948832710557e-05, + "loss": 0.429, + "step": 405 + }, + { + "epoch": 1.181556195965418, + "grad_norm": 1.4001071453094482, + "learning_rate": 2.1372875876826892e-05, + "loss": 0.3588, + "step": 410 + }, + { + "epoch": 1.195965417867435, + "grad_norm": 1.2341262102127075, + "learning_rate": 2.1156192081791355e-05, + "loss": 0.38, + "step": 415 + }, + { + "epoch": 1.2103746397694524, + "grad_norm": 1.5437573194503784, + "learning_rate": 2.093795221765554e-05, + "loss": 0.3487, + "step": 420 + }, + { + "epoch": 1.2247838616714697, + "grad_norm": 1.2651231288909912, + "learning_rate": 2.071821144779066e-05, + "loss": 0.3487, + "step": 425 + }, + { + "epoch": 1.239193083573487, + "grad_norm": 1.3521462678909302, + "learning_rate": 2.049702531494417e-05, + "loss": 0.3628, + "step": 430 + }, + { + "epoch": 1.2536023054755043, + "grad_norm": 1.3782352209091187, + "learning_rate": 2.0274449727200497e-05, + "loss": 0.3089, + "step": 435 + }, + { + "epoch": 1.2680115273775217, + "grad_norm": 1.307472825050354, + "learning_rate": 2.0050540943849477e-05, + "loss": 0.3692, + "step": 440 + }, + { + "epoch": 1.282420749279539, + "grad_norm": 1.279290795326233, + "learning_rate": 1.9825355561165953e-05, + "loss": 0.3627, + "step": 445 + }, + { + "epoch": 1.2968299711815563, + "grad_norm": 1.4601019620895386, + "learning_rate": 1.959895049810423e-05, + "loss": 0.3269, + "step": 450 + }, + { + "epoch": 1.3112391930835736, + "grad_norm": 1.1874650716781616, + "learning_rate": 1.937138298191098e-05, + "loss": 0.3616, + "step": 455 + }, + { + "epoch": 1.3256484149855907, + "grad_norm": 1.4081809520721436, + "learning_rate": 1.914271053366018e-05, + "loss": 0.3191, + "step": 460 + }, + { + "epoch": 1.340057636887608, + "grad_norm": 1.3197582960128784, + "learning_rate": 1.8912990953713812e-05, + "loss": 0.2611, + "step": 465 + }, + { + "epoch": 1.3544668587896254, + "grad_norm": 1.2590925693511963, + "learning_rate": 1.8682282307111988e-05, + "loss": 0.3223, + "step": 470 + }, + { + "epoch": 1.3688760806916427, + "grad_norm": 1.36367666721344, + "learning_rate": 1.8450642908896104e-05, + "loss": 0.3054, + "step": 475 + }, + { + "epoch": 1.38328530259366, + "grad_norm": 1.3232920169830322, + "learning_rate": 1.8218131309368876e-05, + "loss": 0.2973, + "step": 480 + }, + { + "epoch": 1.397694524495677, + "grad_norm": 1.2004799842834473, + "learning_rate": 1.798480627929488e-05, + "loss": 0.2735, + "step": 485 + }, + { + "epoch": 1.4121037463976944, + "grad_norm": 1.4547375440597534, + "learning_rate": 1.7750726795045345e-05, + "loss": 0.305, + "step": 490 + }, + { + "epoch": 1.4265129682997117, + "grad_norm": 1.413098692893982, + "learning_rate": 1.7515952023691022e-05, + "loss": 0.2934, + "step": 495 + }, + { + "epoch": 1.440922190201729, + "grad_norm": 1.241791844367981, + "learning_rate": 1.728054130804681e-05, + "loss": 0.2986, + "step": 500 + }, + { + "epoch": 1.4553314121037464, + "grad_norm": 1.3578342199325562, + "learning_rate": 1.7044554151672003e-05, + "loss": 0.2614, + "step": 505 + }, + { + "epoch": 1.4697406340057637, + "grad_norm": 1.504109263420105, + "learning_rate": 1.6808050203829845e-05, + "loss": 0.2658, + "step": 510 + }, + { + "epoch": 1.484149855907781, + "grad_norm": 1.3065226078033447, + "learning_rate": 1.657108924441031e-05, + "loss": 0.2634, + "step": 515 + }, + { + "epoch": 1.4985590778097984, + "grad_norm": 1.3360189199447632, + "learning_rate": 1.6333731168819854e-05, + "loss": 0.3058, + "step": 520 + }, + { + "epoch": 1.5129682997118157, + "grad_norm": 1.52360200881958, + "learning_rate": 1.6096035972841937e-05, + "loss": 0.2661, + "step": 525 + }, + { + "epoch": 1.527377521613833, + "grad_norm": 1.2809010744094849, + "learning_rate": 1.5858063737472222e-05, + "loss": 0.3099, + "step": 530 + }, + { + "epoch": 1.54178674351585, + "grad_norm": 1.4658622741699219, + "learning_rate": 1.5619874613732198e-05, + "loss": 0.2564, + "step": 535 + }, + { + "epoch": 1.5561959654178674, + "grad_norm": 1.2504994869232178, + "learning_rate": 1.5381528807465113e-05, + "loss": 0.2901, + "step": 540 + }, + { + "epoch": 1.5706051873198847, + "grad_norm": 1.2662858963012695, + "learning_rate": 1.5143086564118042e-05, + "loss": 0.2469, + "step": 545 + }, + { + "epoch": 1.585014409221902, + "grad_norm": 1.3593788146972656, + "learning_rate": 1.4904608153513986e-05, + "loss": 0.2333, + "step": 550 + }, + { + "epoch": 1.5994236311239192, + "grad_norm": 1.38326096534729, + "learning_rate": 1.466615385461774e-05, + "loss": 0.2362, + "step": 555 + }, + { + "epoch": 1.6138328530259365, + "grad_norm": 1.4459670782089233, + "learning_rate": 1.4427783940299526e-05, + "loss": 0.2314, + "step": 560 + }, + { + "epoch": 1.6282420749279538, + "grad_norm": 1.3313766717910767, + "learning_rate": 1.4189558662100094e-05, + "loss": 0.2544, + "step": 565 + }, + { + "epoch": 1.6426512968299711, + "grad_norm": 1.2980507612228394, + "learning_rate": 1.3951538235001262e-05, + "loss": 0.2257, + "step": 570 + }, + { + "epoch": 1.6570605187319885, + "grad_norm": 1.170307993888855, + "learning_rate": 1.3713782822205703e-05, + "loss": 0.2601, + "step": 575 + }, + { + "epoch": 1.6714697406340058, + "grad_norm": 1.3811801671981812, + "learning_rate": 1.3476352519929766e-05, + "loss": 0.2097, + "step": 580 + }, + { + "epoch": 1.685878962536023, + "grad_norm": 1.4127800464630127, + "learning_rate": 1.3239307342213282e-05, + "loss": 0.2149, + "step": 585 + }, + { + "epoch": 1.7002881844380404, + "grad_norm": 1.8162463903427124, + "learning_rate": 1.3002707205750142e-05, + "loss": 0.2502, + "step": 590 + }, + { + "epoch": 1.7146974063400577, + "grad_norm": 1.2340574264526367, + "learning_rate": 1.2766611914743415e-05, + "loss": 0.2176, + "step": 595 + }, + { + "epoch": 1.729106628242075, + "grad_norm": 1.4858880043029785, + "learning_rate": 1.2531081145788989e-05, + "loss": 0.2062, + "step": 600 + }, + { + "epoch": 1.7435158501440924, + "grad_norm": 1.3304120302200317, + "learning_rate": 1.2296174432791415e-05, + "loss": 0.2115, + "step": 605 + }, + { + "epoch": 1.7579250720461095, + "grad_norm": 1.2454310655593872, + "learning_rate": 1.20619511519158e-05, + "loss": 0.1831, + "step": 610 + }, + { + "epoch": 1.7723342939481268, + "grad_norm": 1.2872998714447021, + "learning_rate": 1.1828470506579631e-05, + "loss": 0.192, + "step": 615 + }, + { + "epoch": 1.7867435158501441, + "grad_norm": 1.2484214305877686, + "learning_rate": 1.1595791512488213e-05, + "loss": 0.1877, + "step": 620 + }, + { + "epoch": 1.8011527377521612, + "grad_norm": 1.1907098293304443, + "learning_rate": 1.1363972982717588e-05, + "loss": 0.2053, + "step": 625 + }, + { + "epoch": 1.8155619596541785, + "grad_norm": 1.3836581707000732, + "learning_rate": 1.1133073512848635e-05, + "loss": 0.1838, + "step": 630 + }, + { + "epoch": 1.8299711815561959, + "grad_norm": 1.40169358253479, + "learning_rate": 1.090315146615617e-05, + "loss": 0.2056, + "step": 635 + }, + { + "epoch": 1.8443804034582132, + "grad_norm": 1.3856604099273682, + "learning_rate": 1.0674264958856779e-05, + "loss": 0.1898, + "step": 640 + }, + { + "epoch": 1.8587896253602305, + "grad_norm": 1.3703304529190063, + "learning_rate": 1.0446471845419063e-05, + "loss": 0.226, + "step": 645 + }, + { + "epoch": 1.8731988472622478, + "grad_norm": 1.1608930826187134, + "learning_rate": 1.0219829703940047e-05, + "loss": 0.1993, + "step": 650 + }, + { + "epoch": 1.8876080691642652, + "grad_norm": 1.1893846988677979, + "learning_rate": 9.994395821591501e-06, + "loss": 0.1834, + "step": 655 + }, + { + "epoch": 1.9020172910662825, + "grad_norm": 1.1628425121307373, + "learning_rate": 9.770227180139727e-06, + "loss": 0.1392, + "step": 660 + }, + { + "epoch": 1.9164265129682998, + "grad_norm": 1.33634352684021, + "learning_rate": 9.54738044154255e-06, + "loss": 0.1815, + "step": 665 + }, + { + "epoch": 1.9308357348703171, + "grad_norm": 1.207247018814087, + "learning_rate": 9.325911933627228e-06, + "loss": 0.1738, + "step": 670 + }, + { + "epoch": 1.9452449567723344, + "grad_norm": 1.5310871601104736, + "learning_rate": 9.10587763585269e-06, + "loss": 0.1704, + "step": 675 + }, + { + "epoch": 1.9596541786743515, + "grad_norm": 1.1526015996932983, + "learning_rate": 8.887333165159921e-06, + "loss": 0.1639, + "step": 680 + }, + { + "epoch": 1.9740634005763689, + "grad_norm": 1.2831737995147705, + "learning_rate": 8.67033376191398e-06, + "loss": 0.1723, + "step": 685 + }, + { + "epoch": 1.9884726224783862, + "grad_norm": 1.1766626834869385, + "learning_rate": 8.454934275941129e-06, + "loss": 0.1734, + "step": 690 + }, + { + "epoch": 2.0028818443804033, + "grad_norm": 0.990723729133606, + "learning_rate": 8.241189152664756e-06, + "loss": 0.1457, + "step": 695 + }, + { + "epoch": 2.0172910662824206, + "grad_norm": 1.1404833793640137, + "learning_rate": 8.029152419343472e-06, + "loss": 0.1437, + "step": 700 + }, + { + "epoch": 2.031700288184438, + "grad_norm": 1.0434459447860718, + "learning_rate": 7.81887767141492e-06, + "loss": 0.1348, + "step": 705 + }, + { + "epoch": 2.0461095100864553, + "grad_norm": 0.8517737984657288, + "learning_rate": 7.6104180589487354e-06, + "loss": 0.1186, + "step": 710 + }, + { + "epoch": 2.0605187319884726, + "grad_norm": 1.047645092010498, + "learning_rate": 7.403826273212066e-06, + "loss": 0.1386, + "step": 715 + }, + { + "epoch": 2.07492795389049, + "grad_norm": 1.2038551568984985, + "learning_rate": 7.199154533351086e-06, + "loss": 0.114, + "step": 720 + }, + { + "epoch": 2.089337175792507, + "grad_norm": 1.017468810081482, + "learning_rate": 6.996454573191799e-06, + "loss": 0.1301, + "step": 725 + }, + { + "epoch": 2.1037463976945245, + "grad_norm": 1.3284494876861572, + "learning_rate": 6.795777628163599e-06, + "loss": 0.1107, + "step": 730 + }, + { + "epoch": 2.118155619596542, + "grad_norm": 1.1127188205718994, + "learning_rate": 6.59717442234869e-06, + "loss": 0.1432, + "step": 735 + }, + { + "epoch": 2.132564841498559, + "grad_norm": 1.1952979564666748, + "learning_rate": 6.400695155660866e-06, + "loss": 0.1258, + "step": 740 + }, + { + "epoch": 2.1469740634005765, + "grad_norm": 1.3854767084121704, + "learning_rate": 6.2063894911567185e-06, + "loss": 0.1183, + "step": 745 + }, + { + "epoch": 2.161383285302594, + "grad_norm": 1.134765863418579, + "learning_rate": 6.0143065424825585e-06, + "loss": 0.1224, + "step": 750 + }, + { + "epoch": 2.175792507204611, + "grad_norm": 0.9109749794006348, + "learning_rate": 5.824494861460226e-06, + "loss": 0.1295, + "step": 755 + }, + { + "epoch": 2.1902017291066285, + "grad_norm": 1.2073034048080444, + "learning_rate": 5.6370024258148595e-06, + "loss": 0.1241, + "step": 760 + }, + { + "epoch": 2.2046109510086453, + "grad_norm": 0.9913353323936462, + "learning_rate": 5.451876627047873e-06, + "loss": 0.1127, + "step": 765 + }, + { + "epoch": 2.2190201729106627, + "grad_norm": 1.1208264827728271, + "learning_rate": 5.269164258457997e-06, + "loss": 0.1321, + "step": 770 + }, + { + "epoch": 2.23342939481268, + "grad_norm": 0.9111705422401428, + "learning_rate": 5.088911503313577e-06, + "loss": 0.1175, + "step": 775 + }, + { + "epoch": 2.2478386167146973, + "grad_norm": 1.0533556938171387, + "learning_rate": 4.91116392317912e-06, + "loss": 0.1245, + "step": 780 + }, + { + "epoch": 2.2622478386167146, + "grad_norm": 1.0622891187667847, + "learning_rate": 4.735966446398854e-06, + "loss": 0.1122, + "step": 785 + }, + { + "epoch": 2.276657060518732, + "grad_norm": 1.4199696779251099, + "learning_rate": 4.563363356740486e-06, + "loss": 0.1113, + "step": 790 + }, + { + "epoch": 2.2910662824207493, + "grad_norm": 1.1474500894546509, + "learning_rate": 4.393398282201788e-06, + "loss": 0.1241, + "step": 795 + }, + { + "epoch": 2.3054755043227666, + "grad_norm": 1.409523367881775, + "learning_rate": 4.22611418398298e-06, + "loss": 0.1336, + "step": 800 + }, + { + "epoch": 2.319884726224784, + "grad_norm": 1.0886765718460083, + "learning_rate": 4.0615533456276445e-06, + "loss": 0.1162, + "step": 805 + }, + { + "epoch": 2.3342939481268012, + "grad_norm": 1.0321005582809448, + "learning_rate": 3.8997573623349385e-06, + "loss": 0.1193, + "step": 810 + }, + { + "epoch": 2.3487031700288186, + "grad_norm": 0.9802637696266174, + "learning_rate": 3.7407671304457865e-06, + "loss": 0.1304, + "step": 815 + }, + { + "epoch": 2.363112391930836, + "grad_norm": 1.0601450204849243, + "learning_rate": 3.584622837105702e-06, + "loss": 0.112, + "step": 820 + }, + { + "epoch": 2.377521613832853, + "grad_norm": 0.9524638056755066, + "learning_rate": 3.4313639501069423e-06, + "loss": 0.0855, + "step": 825 + }, + { + "epoch": 2.39193083573487, + "grad_norm": 1.0974494218826294, + "learning_rate": 3.281029207912364e-06, + "loss": 0.1006, + "step": 830 + }, + { + "epoch": 2.4063400576368874, + "grad_norm": 1.1217509508132935, + "learning_rate": 3.1336566098637553e-06, + "loss": 0.1109, + "step": 835 + }, + { + "epoch": 2.4207492795389047, + "grad_norm": 0.9987860321998596, + "learning_rate": 2.989283406576932e-06, + "loss": 0.1075, + "step": 840 + }, + { + "epoch": 2.435158501440922, + "grad_norm": 1.0048942565917969, + "learning_rate": 2.847946090526056e-06, + "loss": 0.1065, + "step": 845 + }, + { + "epoch": 2.4495677233429394, + "grad_norm": 0.9544060826301575, + "learning_rate": 2.7096803868196546e-06, + "loss": 0.1165, + "step": 850 + }, + { + "epoch": 2.4639769452449567, + "grad_norm": 1.072928547859192, + "learning_rate": 2.574521244170554e-06, + "loss": 0.1003, + "step": 855 + }, + { + "epoch": 2.478386167146974, + "grad_norm": 0.8612509369850159, + "learning_rate": 2.442502826062072e-06, + "loss": 0.096, + "step": 860 + }, + { + "epoch": 2.4927953890489913, + "grad_norm": 0.7535232305526733, + "learning_rate": 2.3136585021126965e-06, + "loss": 0.1032, + "step": 865 + }, + { + "epoch": 2.5072046109510087, + "grad_norm": 0.875095784664154, + "learning_rate": 2.1880208396413996e-06, + "loss": 0.0928, + "step": 870 + }, + { + "epoch": 2.521613832853026, + "grad_norm": 0.7649706602096558, + "learning_rate": 2.0656215954358025e-06, + "loss": 0.0954, + "step": 875 + }, + { + "epoch": 2.5360230547550433, + "grad_norm": 0.9066319465637207, + "learning_rate": 1.946491707725122e-06, + "loss": 0.1065, + "step": 880 + }, + { + "epoch": 2.5504322766570606, + "grad_norm": 0.9266146421432495, + "learning_rate": 1.8306612883601193e-06, + "loss": 0.1136, + "step": 885 + }, + { + "epoch": 2.564841498559078, + "grad_norm": 0.9161733984947205, + "learning_rate": 1.718159615201853e-06, + "loss": 0.1077, + "step": 890 + }, + { + "epoch": 2.5792507204610953, + "grad_norm": 1.0627588033676147, + "learning_rate": 1.6090151247212814e-06, + "loss": 0.1165, + "step": 895 + }, + { + "epoch": 2.5936599423631126, + "grad_norm": 0.9494521617889404, + "learning_rate": 1.503255404811511e-06, + "loss": 0.105, + "step": 900 + }, + { + "epoch": 2.60806916426513, + "grad_norm": 0.812920093536377, + "learning_rate": 1.4009071878145502e-06, + "loss": 0.1049, + "step": 905 + }, + { + "epoch": 2.6224783861671472, + "grad_norm": 0.7754286527633667, + "learning_rate": 1.301996343764319e-06, + "loss": 0.1084, + "step": 910 + }, + { + "epoch": 2.636887608069164, + "grad_norm": 0.8664975166320801, + "learning_rate": 1.2065478738475883e-06, + "loss": 0.086, + "step": 915 + }, + { + "epoch": 2.6512968299711814, + "grad_norm": 0.8037185072898865, + "learning_rate": 1.1145859040846012e-06, + "loss": 0.0926, + "step": 920 + }, + { + "epoch": 2.6657060518731988, + "grad_norm": 0.7811781764030457, + "learning_rate": 1.0261336792308168e-06, + "loss": 0.0795, + "step": 925 + }, + { + "epoch": 2.680115273775216, + "grad_norm": 0.8608052134513855, + "learning_rate": 9.412135569014807e-07, + "loss": 0.0912, + "step": 930 + }, + { + "epoch": 2.6945244956772334, + "grad_norm": 0.9751714468002319, + "learning_rate": 8.598470019204047e-07, + "loss": 0.1114, + "step": 935 + }, + { + "epoch": 2.7089337175792507, + "grad_norm": 0.8285171985626221, + "learning_rate": 7.820545808943947e-07, + "loss": 0.0975, + "step": 940 + }, + { + "epoch": 2.723342939481268, + "grad_norm": 0.8056805729866028, + "learning_rate": 7.078559570147542e-07, + "loss": 0.0845, + "step": 945 + }, + { + "epoch": 2.7377521613832854, + "grad_norm": 0.7468534111976624, + "learning_rate": 6.372698850871101e-07, + "loss": 0.1042, + "step": 950 + }, + { + "epoch": 2.7521613832853027, + "grad_norm": 0.6082252264022827, + "learning_rate": 5.703142067908613e-07, + "loss": 0.092, + "step": 955 + }, + { + "epoch": 2.76657060518732, + "grad_norm": 0.7112461924552917, + "learning_rate": 5.070058461694261e-07, + "loss": 0.0796, + "step": 960 + }, + { + "epoch": 2.7809798270893373, + "grad_norm": 0.6715186238288879, + "learning_rate": 4.4736080535244084e-07, + "loss": 0.0769, + "step": 965 + }, + { + "epoch": 2.795389048991354, + "grad_norm": 0.6804276704788208, + "learning_rate": 3.9139416051098053e-07, + "loss": 0.0737, + "step": 970 + }, + { + "epoch": 2.8097982708933715, + "grad_norm": 0.805993378162384, + "learning_rate": 3.391200580468318e-07, + "loss": 0.1009, + "step": 975 + }, + { + "epoch": 2.824207492795389, + "grad_norm": 0.7249871492385864, + "learning_rate": 2.905517110167899e-07, + "loss": 0.0896, + "step": 980 + }, + { + "epoch": 2.838616714697406, + "grad_norm": 0.848204493522644, + "learning_rate": 2.457013957928472e-07, + "loss": 0.0957, + "step": 985 + }, + { + "epoch": 2.8530259365994235, + "grad_norm": 0.811032772064209, + "learning_rate": 2.0458044895916516e-07, + "loss": 0.0813, + "step": 990 + }, + { + "epoch": 2.867435158501441, + "grad_norm": 0.7549427151679993, + "learning_rate": 1.6719926444658472e-07, + "loss": 0.1104, + "step": 995 + }, + { + "epoch": 2.881844380403458, + "grad_norm": 0.8233932256698608, + "learning_rate": 1.335672909054081e-07, + "loss": 0.0984, + "step": 1000 + }, + { + "epoch": 2.8962536023054755, + "grad_norm": 0.9352244138717651, + "learning_rate": 1.0369302931710067e-07, + "loss": 0.1044, + "step": 1005 + }, + { + "epoch": 2.910662824207493, + "grad_norm": 0.6270148754119873, + "learning_rate": 7.758403084555499e-08, + "loss": 0.0916, + "step": 1010 + }, + { + "epoch": 2.92507204610951, + "grad_norm": 0.7974348664283752, + "learning_rate": 5.5246894928412396e-08, + "loss": 0.0926, + "step": 1015 + }, + { + "epoch": 2.9394812680115274, + "grad_norm": 0.9200726747512817, + "learning_rate": 3.668726760896246e-08, + "loss": 0.1024, + "step": 1020 + }, + { + "epoch": 2.9538904899135447, + "grad_norm": 0.7999383807182312, + "learning_rate": 2.190984010901953e-08, + "loss": 0.0887, + "step": 1025 + }, + { + "epoch": 2.968299711815562, + "grad_norm": 0.8747702836990356, + "learning_rate": 1.0918347643146254e-08, + "loss": 0.0989, + "step": 1030 + }, + { + "epoch": 2.9827089337175794, + "grad_norm": 0.9507675766944885, + "learning_rate": 3.715568474522146e-09, + "loss": 0.0995, + "step": 1035 + }, + { + "epoch": 2.9971181556195967, + "grad_norm": 0.7159132957458496, + "learning_rate": 3.0332321269865847e-10, + "loss": 0.0949, + "step": 1040 + }, + { + "epoch": 3.0, + "step": 1041, + "total_flos": 1.2938989562746634e+18, + "train_loss": 0.4432945014635149, + "train_runtime": 1032.7578, + "train_samples_per_second": 32.247, + "train_steps_per_second": 1.008 + } + ], + "logging_steps": 5, + "max_steps": 1041, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2938989562746634e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..32e3260e0f72662d7c5f4c8d1303aed264934e51 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/1_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e118e25c6426cd53e57c363e44491550c13f8211c0ac1bf42611bd2ad99d5eca +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fe0758123c3e35ea80f7bd3408c64d1d3725b532 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 2_128_e3_3e-5 + results: [] +--- + + + +# 2_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..920b663faff4ed510c1a85835ada4d18620be8d5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "gate_proj", + "down_proj", + "up_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94f8f808c714bc64e0f8a1d0c5c18416440befaf --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad62ec0e8851a4e28c8bf1ab4fd9e9e71d9821054f598c7768df3c8b027c799 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8408ce79afbe482b383781929d1e803d514a182f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2070794603192648e+18, + "train_loss": 0.45383420935537244, + "train_runtime": 973.654, + "train_samples": 10085, + "train_samples_per_second": 31.074, + "train_steps_per_second": 0.974 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8408ce79afbe482b383781929d1e803d514a182f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2070794603192648e+18, + "train_loss": 0.45383420935537244, + "train_runtime": 973.654, + "train_samples": 10085, + "train_samples_per_second": 31.074, + "train_steps_per_second": 0.974 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6803406c3e27c70c03f3462bd0282146454fc719 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1366 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 948, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01586042823156225, + "grad_norm": 0.6710348129272461, + "learning_rate": 2.4999999999999998e-06, + "loss": 1.5195, + "step": 5 + }, + { + "epoch": 0.0317208564631245, + "grad_norm": 0.6248921155929565, + "learning_rate": 5.625e-06, + "loss": 1.5307, + "step": 10 + }, + { + "epoch": 0.047581284694686754, + "grad_norm": 0.7797293066978455, + "learning_rate": 8.750000000000001e-06, + "loss": 1.538, + "step": 15 + }, + { + "epoch": 0.063441712926249, + "grad_norm": 0.5133812427520752, + "learning_rate": 1.1874999999999999e-05, + "loss": 1.5762, + "step": 20 + }, + { + "epoch": 0.07930214115781126, + "grad_norm": 0.5189114212989807, + "learning_rate": 1.5e-05, + "loss": 1.4581, + "step": 25 + }, + { + "epoch": 0.09516256938937351, + "grad_norm": 0.5606507062911987, + "learning_rate": 1.8125e-05, + "loss": 1.5134, + "step": 30 + }, + { + "epoch": 0.11102299762093576, + "grad_norm": 0.4825236201286316, + "learning_rate": 2.125e-05, + "loss": 1.4466, + "step": 35 + }, + { + "epoch": 0.126883425852498, + "grad_norm": 0.459823876619339, + "learning_rate": 2.4375e-05, + "loss": 1.4045, + "step": 40 + }, + { + "epoch": 0.14274385408406026, + "grad_norm": 0.5159185528755188, + "learning_rate": 2.75e-05, + "loss": 1.3839, + "step": 45 + }, + { + "epoch": 0.1586042823156225, + "grad_norm": 0.5083280801773071, + "learning_rate": 2.9999908614866857e-05, + "loss": 1.3823, + "step": 50 + }, + { + "epoch": 0.17446471054718476, + "grad_norm": 0.522777795791626, + "learning_rate": 2.9996710252122685e-05, + "loss": 1.3722, + "step": 55 + }, + { + "epoch": 0.19032513877874702, + "grad_norm": 0.618506908416748, + "learning_rate": 2.9988943746159317e-05, + "loss": 1.3409, + "step": 60 + }, + { + "epoch": 0.20618556701030927, + "grad_norm": 0.5709981322288513, + "learning_rate": 2.9976611462729715e-05, + "loss": 1.3079, + "step": 65 + }, + { + "epoch": 0.22204599524187152, + "grad_norm": 0.6473709344863892, + "learning_rate": 2.995971715836687e-05, + "loss": 1.3187, + "step": 70 + }, + { + "epoch": 0.23790642347343377, + "grad_norm": 0.6394116282463074, + "learning_rate": 2.9938265979239552e-05, + "loss": 1.2475, + "step": 75 + }, + { + "epoch": 0.253766851704996, + "grad_norm": 0.5870379209518433, + "learning_rate": 2.9912264459584732e-05, + "loss": 1.2221, + "step": 80 + }, + { + "epoch": 0.2696272799365583, + "grad_norm": 0.6369397044181824, + "learning_rate": 2.988172051971717e-05, + "loss": 1.2441, + "step": 85 + }, + { + "epoch": 0.2854877081681205, + "grad_norm": 0.8708164691925049, + "learning_rate": 2.9846643463616834e-05, + "loss": 1.1795, + "step": 90 + }, + { + "epoch": 0.3013481363996828, + "grad_norm": 0.6420678496360779, + "learning_rate": 2.980704397609482e-05, + "loss": 1.1926, + "step": 95 + }, + { + "epoch": 0.317208564631245, + "grad_norm": 0.7389319539070129, + "learning_rate": 2.9762934119538628e-05, + "loss": 1.124, + "step": 100 + }, + { + "epoch": 0.3330689928628073, + "grad_norm": 0.7167187333106995, + "learning_rate": 2.9714327330237873e-05, + "loss": 1.1234, + "step": 105 + }, + { + "epoch": 0.34892942109436953, + "grad_norm": 0.7113566398620605, + "learning_rate": 2.966123841429144e-05, + "loss": 1.1134, + "step": 110 + }, + { + "epoch": 0.3647898493259318, + "grad_norm": 0.7677503228187561, + "learning_rate": 2.9603683543097406e-05, + "loss": 1.0851, + "step": 115 + }, + { + "epoch": 0.38065027755749403, + "grad_norm": 0.767101526260376, + "learning_rate": 2.9541680248427087e-05, + "loss": 1.0118, + "step": 120 + }, + { + "epoch": 0.3965107057890563, + "grad_norm": 0.8606610298156738, + "learning_rate": 2.9475247417084672e-05, + "loss": 1.0094, + "step": 125 + }, + { + "epoch": 0.41237113402061853, + "grad_norm": 0.8410919308662415, + "learning_rate": 2.9404405285154146e-05, + "loss": 1.0325, + "step": 130 + }, + { + "epoch": 0.4282315622521808, + "grad_norm": 0.87024986743927, + "learning_rate": 2.9329175431835152e-05, + "loss": 1.0242, + "step": 135 + }, + { + "epoch": 0.44409199048374304, + "grad_norm": 0.9597465991973877, + "learning_rate": 2.9249580772869786e-05, + "loss": 1.0142, + "step": 140 + }, + { + "epoch": 0.4599524187153053, + "grad_norm": 0.9913133978843689, + "learning_rate": 2.9165645553562215e-05, + "loss": 0.8493, + "step": 145 + }, + { + "epoch": 0.47581284694686754, + "grad_norm": 1.0264703035354614, + "learning_rate": 2.907739534139334e-05, + "loss": 0.9527, + "step": 150 + }, + { + "epoch": 0.4916732751784298, + "grad_norm": 0.9425225257873535, + "learning_rate": 2.8984857018232682e-05, + "loss": 0.8932, + "step": 155 + }, + { + "epoch": 0.507533703409992, + "grad_norm": 1.331936240196228, + "learning_rate": 2.8888058772149923e-05, + "loss": 0.8814, + "step": 160 + }, + { + "epoch": 0.5233941316415543, + "grad_norm": 1.1443214416503906, + "learning_rate": 2.8787030088828517e-05, + "loss": 0.8242, + "step": 165 + }, + { + "epoch": 0.5392545598731165, + "grad_norm": 1.0150359869003296, + "learning_rate": 2.8681801742584095e-05, + "loss": 0.8611, + "step": 170 + }, + { + "epoch": 0.5551149881046789, + "grad_norm": 1.0120210647583008, + "learning_rate": 2.8572405786990293e-05, + "loss": 0.8581, + "step": 175 + }, + { + "epoch": 0.570975416336241, + "grad_norm": 1.094923496246338, + "learning_rate": 2.8458875545114926e-05, + "loss": 0.7816, + "step": 180 + }, + { + "epoch": 0.5868358445678034, + "grad_norm": 1.2213410139083862, + "learning_rate": 2.8341245599369464e-05, + "loss": 0.8727, + "step": 185 + }, + { + "epoch": 0.6026962727993656, + "grad_norm": 0.997976541519165, + "learning_rate": 2.821955178097488e-05, + "loss": 0.7841, + "step": 190 + }, + { + "epoch": 0.6185567010309279, + "grad_norm": 0.922734797000885, + "learning_rate": 2.8093831159047146e-05, + "loss": 0.8048, + "step": 195 + }, + { + "epoch": 0.63441712926249, + "grad_norm": 1.0579707622528076, + "learning_rate": 2.7964122029305574e-05, + "loss": 0.8368, + "step": 200 + }, + { + "epoch": 0.6502775574940524, + "grad_norm": 1.19312584400177, + "learning_rate": 2.78304639024076e-05, + "loss": 0.7884, + "step": 205 + }, + { + "epoch": 0.6661379857256146, + "grad_norm": 1.05195152759552, + "learning_rate": 2.769289749191346e-05, + "loss": 0.756, + "step": 210 + }, + { + "epoch": 0.6819984139571769, + "grad_norm": 1.0421589612960815, + "learning_rate": 2.7551464701884426e-05, + "loss": 0.7171, + "step": 215 + }, + { + "epoch": 0.6978588421887391, + "grad_norm": 1.2104095220565796, + "learning_rate": 2.7406208614118427e-05, + "loss": 0.7261, + "step": 220 + }, + { + "epoch": 0.7137192704203014, + "grad_norm": 1.3976048231124878, + "learning_rate": 2.7257173475026926e-05, + "loss": 0.7175, + "step": 225 + }, + { + "epoch": 0.7295796986518636, + "grad_norm": 1.133556842803955, + "learning_rate": 2.710440468215703e-05, + "loss": 0.6931, + "step": 230 + }, + { + "epoch": 0.7454401268834259, + "grad_norm": 1.1223598718643188, + "learning_rate": 2.6947948770362945e-05, + "loss": 0.6373, + "step": 235 + }, + { + "epoch": 0.7613005551149881, + "grad_norm": 1.1269049644470215, + "learning_rate": 2.678785339763103e-05, + "loss": 0.669, + "step": 240 + }, + { + "epoch": 0.7771609833465504, + "grad_norm": 1.362699270248413, + "learning_rate": 2.6624167330562697e-05, + "loss": 0.6623, + "step": 245 + }, + { + "epoch": 0.7930214115781126, + "grad_norm": 1.1603854894638062, + "learning_rate": 2.6456940429519637e-05, + "loss": 0.6327, + "step": 250 + }, + { + "epoch": 0.8088818398096749, + "grad_norm": 1.4373319149017334, + "learning_rate": 2.6286223633435865e-05, + "loss": 0.5698, + "step": 255 + }, + { + "epoch": 0.8247422680412371, + "grad_norm": 1.1621992588043213, + "learning_rate": 2.611206894430125e-05, + "loss": 0.6367, + "step": 260 + }, + { + "epoch": 0.8406026962727994, + "grad_norm": 1.254452109336853, + "learning_rate": 2.5934529411321174e-05, + "loss": 0.6026, + "step": 265 + }, + { + "epoch": 0.8564631245043616, + "grad_norm": 1.2391091585159302, + "learning_rate": 2.575365911475724e-05, + "loss": 0.5666, + "step": 270 + }, + { + "epoch": 0.8723235527359239, + "grad_norm": 1.3140405416488647, + "learning_rate": 2.556951314945392e-05, + "loss": 0.606, + "step": 275 + }, + { + "epoch": 0.8881839809674861, + "grad_norm": 1.1733580827713013, + "learning_rate": 2.5382147608056104e-05, + "loss": 0.6001, + "step": 280 + }, + { + "epoch": 0.9040444091990484, + "grad_norm": 1.2169251441955566, + "learning_rate": 2.519161956392275e-05, + "loss": 0.6096, + "step": 285 + }, + { + "epoch": 0.9199048374306106, + "grad_norm": 1.2592731714248657, + "learning_rate": 2.4997987053741785e-05, + "loss": 0.5905, + "step": 290 + }, + { + "epoch": 0.9357652656621729, + "grad_norm": 1.2125664949417114, + "learning_rate": 2.4801309059851586e-05, + "loss": 0.6137, + "step": 295 + }, + { + "epoch": 0.9516256938937351, + "grad_norm": 1.2289000749588013, + "learning_rate": 2.4601645492274334e-05, + "loss": 0.5257, + "step": 300 + }, + { + "epoch": 0.9674861221252974, + "grad_norm": 1.3832378387451172, + "learning_rate": 2.439905717046691e-05, + "loss": 0.542, + "step": 305 + }, + { + "epoch": 0.9833465503568596, + "grad_norm": 1.2151378393173218, + "learning_rate": 2.419360580479465e-05, + "loss": 0.5228, + "step": 310 + }, + { + "epoch": 0.9992069785884219, + "grad_norm": 1.228219985961914, + "learning_rate": 2.3985353977733787e-05, + "loss": 0.5255, + "step": 315 + }, + { + "epoch": 1.0126883425852498, + "grad_norm": 1.374066948890686, + "learning_rate": 2.3774365124808236e-05, + "loss": 0.3935, + "step": 320 + }, + { + "epoch": 1.028548770816812, + "grad_norm": 1.2443376779556274, + "learning_rate": 2.356070351526648e-05, + "loss": 0.4715, + "step": 325 + }, + { + "epoch": 1.0444091990483744, + "grad_norm": 1.4360579252243042, + "learning_rate": 2.3344434232504573e-05, + "loss": 0.3987, + "step": 330 + }, + { + "epoch": 1.0602696272799366, + "grad_norm": 1.3215537071228027, + "learning_rate": 2.31256231542411e-05, + "loss": 0.4848, + "step": 335 + }, + { + "epoch": 1.0761300555114988, + "grad_norm": 1.2371535301208496, + "learning_rate": 2.2904336932450167e-05, + "loss": 0.3865, + "step": 340 + }, + { + "epoch": 1.0919904837430612, + "grad_norm": 1.2728970050811768, + "learning_rate": 2.2680642973058574e-05, + "loss": 0.3767, + "step": 345 + }, + { + "epoch": 1.1078509119746234, + "grad_norm": 1.2773139476776123, + "learning_rate": 2.245460941541333e-05, + "loss": 0.4156, + "step": 350 + }, + { + "epoch": 1.1237113402061856, + "grad_norm": 1.530358910560608, + "learning_rate": 2.222630511152573e-05, + "loss": 0.4542, + "step": 355 + }, + { + "epoch": 1.1395717684377478, + "grad_norm": 1.4416447877883911, + "learning_rate": 2.199579960509837e-05, + "loss": 0.3976, + "step": 360 + }, + { + "epoch": 1.15543219666931, + "grad_norm": 1.1461186408996582, + "learning_rate": 2.176316311034146e-05, + "loss": 0.374, + "step": 365 + }, + { + "epoch": 1.1712926249008724, + "grad_norm": 1.4214637279510498, + "learning_rate": 2.1528466490584914e-05, + "loss": 0.3796, + "step": 370 + }, + { + "epoch": 1.1871530531324346, + "grad_norm": 1.644792914390564, + "learning_rate": 2.1291781236692657e-05, + "loss": 0.392, + "step": 375 + }, + { + "epoch": 1.2030134813639968, + "grad_norm": 1.297376275062561, + "learning_rate": 2.1053179445285853e-05, + "loss": 0.3977, + "step": 380 + }, + { + "epoch": 1.2188739095955592, + "grad_norm": 1.3786792755126953, + "learning_rate": 2.0812733796781544e-05, + "loss": 0.3934, + "step": 385 + }, + { + "epoch": 1.2347343378271214, + "grad_norm": 1.4446027278900146, + "learning_rate": 2.0570517533253524e-05, + "loss": 0.3627, + "step": 390 + }, + { + "epoch": 1.2505947660586836, + "grad_norm": 1.3597840070724487, + "learning_rate": 2.0326604436122056e-05, + "loss": 0.3643, + "step": 395 + }, + { + "epoch": 1.2664551942902458, + "grad_norm": 1.3699487447738647, + "learning_rate": 2.0081068803679374e-05, + "loss": 0.3654, + "step": 400 + }, + { + "epoch": 1.282315622521808, + "grad_norm": 1.219528079032898, + "learning_rate": 1.983398542845767e-05, + "loss": 0.3624, + "step": 405 + }, + { + "epoch": 1.2981760507533704, + "grad_norm": 1.4922993183135986, + "learning_rate": 1.9585429574446588e-05, + "loss": 0.3563, + "step": 410 + }, + { + "epoch": 1.3140364789849326, + "grad_norm": 1.2577568292617798, + "learning_rate": 1.9335476954167073e-05, + "loss": 0.3711, + "step": 415 + }, + { + "epoch": 1.3298969072164948, + "grad_norm": 1.356351375579834, + "learning_rate": 1.9084203705608614e-05, + "loss": 0.3749, + "step": 420 + }, + { + "epoch": 1.3457573354480572, + "grad_norm": 1.4249366521835327, + "learning_rate": 1.883168636903686e-05, + "loss": 0.3292, + "step": 425 + }, + { + "epoch": 1.3616177636796194, + "grad_norm": 1.4153070449829102, + "learning_rate": 1.8578001863678713e-05, + "loss": 0.3443, + "step": 430 + }, + { + "epoch": 1.3774781919111816, + "grad_norm": 1.254011869430542, + "learning_rate": 1.8323227464292014e-05, + "loss": 0.2892, + "step": 435 + }, + { + "epoch": 1.3933386201427438, + "grad_norm": 1.352268934249878, + "learning_rate": 1.8067440777626853e-05, + "loss": 0.2844, + "step": 440 + }, + { + "epoch": 1.409199048374306, + "grad_norm": 1.3375798463821411, + "learning_rate": 1.781071971878587e-05, + "loss": 0.3114, + "step": 445 + }, + { + "epoch": 1.4250594766058684, + "grad_norm": 1.2673659324645996, + "learning_rate": 1.7553142487490488e-05, + "loss": 0.2847, + "step": 450 + }, + { + "epoch": 1.4409199048374306, + "grad_norm": 1.3944097757339478, + "learning_rate": 1.7294787544260573e-05, + "loss": 0.3226, + "step": 455 + }, + { + "epoch": 1.4567803330689928, + "grad_norm": 1.4634172916412354, + "learning_rate": 1.7035733586514568e-05, + "loss": 0.2547, + "step": 460 + }, + { + "epoch": 1.4726407613005552, + "grad_norm": 1.152276635169983, + "learning_rate": 1.6776059524597518e-05, + "loss": 0.2921, + "step": 465 + }, + { + "epoch": 1.4885011895321174, + "grad_norm": 1.3767081499099731, + "learning_rate": 1.6515844457744193e-05, + "loss": 0.2595, + "step": 470 + }, + { + "epoch": 1.5043616177636796, + "grad_norm": 1.3184919357299805, + "learning_rate": 1.6255167649984738e-05, + "loss": 0.2462, + "step": 475 + }, + { + "epoch": 1.5202220459952418, + "grad_norm": 1.268489956855774, + "learning_rate": 1.5994108506000005e-05, + "loss": 0.2731, + "step": 480 + }, + { + "epoch": 1.536082474226804, + "grad_norm": 1.2082581520080566, + "learning_rate": 1.57327465469342e-05, + "loss": 0.3269, + "step": 485 + }, + { + "epoch": 1.5519429024583664, + "grad_norm": 1.2798997163772583, + "learning_rate": 1.5471161386171925e-05, + "loss": 0.2676, + "step": 490 + }, + { + "epoch": 1.5678033306899286, + "grad_norm": 1.4968881607055664, + "learning_rate": 1.5209432705087183e-05, + "loss": 0.2788, + "step": 495 + }, + { + "epoch": 1.583663758921491, + "grad_norm": 1.4262897968292236, + "learning_rate": 1.494764022877165e-05, + "loss": 0.2404, + "step": 500 + }, + { + "epoch": 1.5995241871530532, + "grad_norm": 1.1628328561782837, + "learning_rate": 1.4685863701749648e-05, + "loss": 0.2427, + "step": 505 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 1.2132608890533447, + "learning_rate": 1.4424182863687201e-05, + "loss": 0.2313, + "step": 510 + }, + { + "epoch": 1.6312450436161776, + "grad_norm": 1.228310227394104, + "learning_rate": 1.4162677425102542e-05, + "loss": 0.2426, + "step": 515 + }, + { + "epoch": 1.6471054718477398, + "grad_norm": 1.2990258932113647, + "learning_rate": 1.3901427043085528e-05, + "loss": 0.2326, + "step": 520 + }, + { + "epoch": 1.662965900079302, + "grad_norm": 1.357536792755127, + "learning_rate": 1.36405112970333e-05, + "loss": 0.2038, + "step": 525 + }, + { + "epoch": 1.6788263283108644, + "grad_norm": 1.2959481477737427, + "learning_rate": 1.3380009664409656e-05, + "loss": 0.2892, + "step": 530 + }, + { + "epoch": 1.6946867565424266, + "grad_norm": 1.1696343421936035, + "learning_rate": 1.3120001496535434e-05, + "loss": 0.2359, + "step": 535 + }, + { + "epoch": 1.710547184773989, + "grad_norm": 1.674237847328186, + "learning_rate": 1.2860565994417327e-05, + "loss": 0.2168, + "step": 540 + }, + { + "epoch": 1.7264076130055512, + "grad_norm": 1.4041743278503418, + "learning_rate": 1.2601782184622479e-05, + "loss": 0.2296, + "step": 545 + }, + { + "epoch": 1.7422680412371134, + "grad_norm": 1.6142802238464355, + "learning_rate": 1.2343728895206252e-05, + "loss": 0.2274, + "step": 550 + }, + { + "epoch": 1.7581284694686756, + "grad_norm": 1.435827374458313, + "learning_rate": 1.2086484731700424e-05, + "loss": 0.2225, + "step": 555 + }, + { + "epoch": 1.7739888977002378, + "grad_norm": 1.4675534963607788, + "learning_rate": 1.1830128053169172e-05, + "loss": 0.2236, + "step": 560 + }, + { + "epoch": 1.7898493259318, + "grad_norm": 1.2981373071670532, + "learning_rate": 1.1574736948340163e-05, + "loss": 0.2185, + "step": 565 + }, + { + "epoch": 1.8057097541633624, + "grad_norm": 1.2527811527252197, + "learning_rate": 1.132038921181796e-05, + "loss": 0.1983, + "step": 570 + }, + { + "epoch": 1.8215701823949246, + "grad_norm": 1.319973111152649, + "learning_rate": 1.1067162320387032e-05, + "loss": 0.1879, + "step": 575 + }, + { + "epoch": 1.837430610626487, + "grad_norm": 1.2352094650268555, + "learning_rate": 1.0815133409411564e-05, + "loss": 0.1771, + "step": 580 + }, + { + "epoch": 1.8532910388580492, + "grad_norm": 1.255297064781189, + "learning_rate": 1.0564379249339306e-05, + "loss": 0.21, + "step": 585 + }, + { + "epoch": 1.8691514670896114, + "grad_norm": 1.3154605627059937, + "learning_rate": 1.031497622231651e-05, + "loss": 0.1958, + "step": 590 + }, + { + "epoch": 1.8850118953211736, + "grad_norm": 1.1625304222106934, + "learning_rate": 1.0067000298921251e-05, + "loss": 0.2204, + "step": 595 + }, + { + "epoch": 1.9008723235527358, + "grad_norm": 0.9641660451889038, + "learning_rate": 9.820527015021981e-06, + "loss": 0.1812, + "step": 600 + }, + { + "epoch": 1.916732751784298, + "grad_norm": 1.3147978782653809, + "learning_rate": 9.575631448768618e-06, + "loss": 0.174, + "step": 605 + }, + { + "epoch": 1.9325931800158604, + "grad_norm": 1.2045432329177856, + "learning_rate": 9.332388197722995e-06, + "loss": 0.1725, + "step": 610 + }, + { + "epoch": 1.9484536082474226, + "grad_norm": 1.0281227827072144, + "learning_rate": 9.090871356135733e-06, + "loss": 0.1643, + "step": 615 + }, + { + "epoch": 1.964314036478985, + "grad_norm": 1.1965527534484863, + "learning_rate": 8.851154492376408e-06, + "loss": 0.1865, + "step": 620 + }, + { + "epoch": 1.9801744647105473, + "grad_norm": 1.3370472192764282, + "learning_rate": 8.61331062652391e-06, + "loss": 0.2105, + "step": 625 + }, + { + "epoch": 1.9960348929421095, + "grad_norm": 1.170845627784729, + "learning_rate": 8.377412208123823e-06, + "loss": 0.1841, + "step": 630 + }, + { + "epoch": 2.0095162569389373, + "grad_norm": 1.1804206371307373, + "learning_rate": 8.143531094119591e-06, + "loss": 0.1725, + "step": 635 + }, + { + "epoch": 2.0253766851704995, + "grad_norm": 1.1565989255905151, + "learning_rate": 7.911738526964192e-06, + "loss": 0.1465, + "step": 640 + }, + { + "epoch": 2.0412371134020617, + "grad_norm": 1.1448748111724854, + "learning_rate": 7.682105112919007e-06, + "loss": 0.1283, + "step": 645 + }, + { + "epoch": 2.057097541633624, + "grad_norm": 1.0873507261276245, + "learning_rate": 7.454700800546474e-06, + "loss": 0.1566, + "step": 650 + }, + { + "epoch": 2.0729579698651865, + "grad_norm": 1.2714923620224, + "learning_rate": 7.229594859403049e-06, + "loss": 0.1582, + "step": 655 + }, + { + "epoch": 2.0888183980967487, + "grad_norm": 1.0228670835494995, + "learning_rate": 7.006855858939049e-06, + "loss": 0.1385, + "step": 660 + }, + { + "epoch": 2.104678826328311, + "grad_norm": 1.0523014068603516, + "learning_rate": 6.7865516476117475e-06, + "loss": 0.1308, + "step": 665 + }, + { + "epoch": 2.120539254559873, + "grad_norm": 1.3327562808990479, + "learning_rate": 6.568749332218045e-06, + "loss": 0.1508, + "step": 670 + }, + { + "epoch": 2.1363996827914353, + "grad_norm": 1.0790964365005493, + "learning_rate": 6.3535152574531025e-06, + "loss": 0.1248, + "step": 675 + }, + { + "epoch": 2.1522601110229975, + "grad_norm": 1.1325013637542725, + "learning_rate": 6.1409149857011175e-06, + "loss": 0.1316, + "step": 680 + }, + { + "epoch": 2.1681205392545597, + "grad_norm": 0.9746434092521667, + "learning_rate": 5.931013277064377e-06, + "loss": 0.1327, + "step": 685 + }, + { + "epoch": 2.1839809674861224, + "grad_norm": 1.3345223665237427, + "learning_rate": 5.72387406963669e-06, + "loss": 0.1354, + "step": 690 + }, + { + "epoch": 2.1998413957176846, + "grad_norm": 1.1683239936828613, + "learning_rate": 5.519560460027236e-06, + "loss": 0.1429, + "step": 695 + }, + { + "epoch": 2.2157018239492468, + "grad_norm": 1.2948179244995117, + "learning_rate": 5.318134684140739e-06, + "loss": 0.1667, + "step": 700 + }, + { + "epoch": 2.231562252180809, + "grad_norm": 1.026050329208374, + "learning_rate": 5.119658098219791e-06, + "loss": 0.1304, + "step": 705 + }, + { + "epoch": 2.247422680412371, + "grad_norm": 0.9528861045837402, + "learning_rate": 4.92419116015515e-06, + "loss": 0.1194, + "step": 710 + }, + { + "epoch": 2.2632831086439333, + "grad_norm": 1.184850811958313, + "learning_rate": 4.731793411069669e-06, + "loss": 0.132, + "step": 715 + }, + { + "epoch": 2.2791435368754955, + "grad_norm": 1.0486268997192383, + "learning_rate": 4.5425234571815335e-06, + "loss": 0.1143, + "step": 720 + }, + { + "epoch": 2.2950039651070577, + "grad_norm": 0.9157793521881104, + "learning_rate": 4.356438951952189e-06, + "loss": 0.1201, + "step": 725 + }, + { + "epoch": 2.31086439333862, + "grad_norm": 1.0619646310806274, + "learning_rate": 4.173596578524568e-06, + "loss": 0.1032, + "step": 730 + }, + { + "epoch": 2.3267248215701826, + "grad_norm": 0.9993194937705994, + "learning_rate": 3.994052032456853e-06, + "loss": 0.1059, + "step": 735 + }, + { + "epoch": 2.3425852498017448, + "grad_norm": 1.0271214246749878, + "learning_rate": 3.8178600047570675e-06, + "loss": 0.1363, + "step": 740 + }, + { + "epoch": 2.358445678033307, + "grad_norm": 0.9989938139915466, + "learning_rate": 3.645074165223656e-06, + "loss": 0.1226, + "step": 745 + }, + { + "epoch": 2.374306106264869, + "grad_norm": 1.3176790475845337, + "learning_rate": 3.475747146097153e-06, + "loss": 0.1097, + "step": 750 + }, + { + "epoch": 2.3901665344964313, + "grad_norm": 1.0843831300735474, + "learning_rate": 3.309930526027885e-06, + "loss": 0.1257, + "step": 755 + }, + { + "epoch": 2.4060269627279935, + "grad_norm": 1.0826454162597656, + "learning_rate": 3.1476748143646437e-06, + "loss": 0.0979, + "step": 760 + }, + { + "epoch": 2.4218873909595557, + "grad_norm": 1.171995759010315, + "learning_rate": 2.9890294357689994e-06, + "loss": 0.1047, + "step": 765 + }, + { + "epoch": 2.4377478191911184, + "grad_norm": 0.9295881986618042, + "learning_rate": 2.8340427151601036e-06, + "loss": 0.0983, + "step": 770 + }, + { + "epoch": 2.4536082474226806, + "grad_norm": 0.8085209131240845, + "learning_rate": 2.6827618629944394e-06, + "loss": 0.1035, + "step": 775 + }, + { + "epoch": 2.4694686756542428, + "grad_norm": 1.0635337829589844, + "learning_rate": 2.5352329608850783e-06, + "loss": 0.1067, + "step": 780 + }, + { + "epoch": 2.485329103885805, + "grad_norm": 0.8687992095947266, + "learning_rate": 2.3915009475647364e-06, + "loss": 0.1083, + "step": 785 + }, + { + "epoch": 2.501189532117367, + "grad_norm": 0.9182899594306946, + "learning_rate": 2.2516096051970438e-06, + "loss": 0.1015, + "step": 790 + }, + { + "epoch": 2.5170499603489294, + "grad_norm": 0.9503368139266968, + "learning_rate": 2.1156015460400333e-06, + "loss": 0.1129, + "step": 795 + }, + { + "epoch": 2.5329103885804916, + "grad_norm": 0.8351601958274841, + "learning_rate": 1.9835181994660754e-06, + "loss": 0.0983, + "step": 800 + }, + { + "epoch": 2.5487708168120538, + "grad_norm": 0.8500259518623352, + "learning_rate": 1.8553997993420495e-06, + "loss": 0.1076, + "step": 805 + }, + { + "epoch": 2.564631245043616, + "grad_norm": 0.878666341304779, + "learning_rate": 1.731285371773741e-06, + "loss": 0.1078, + "step": 810 + }, + { + "epoch": 2.5804916732751786, + "grad_norm": 0.9775558114051819, + "learning_rate": 1.6112127232181163e-06, + "loss": 0.1058, + "step": 815 + }, + { + "epoch": 2.596352101506741, + "grad_norm": 0.8739961385726929, + "learning_rate": 1.4952184289670974e-06, + "loss": 0.0922, + "step": 820 + }, + { + "epoch": 2.612212529738303, + "grad_norm": 0.8368566036224365, + "learning_rate": 1.3833378220063713e-06, + "loss": 0.1111, + "step": 825 + }, + { + "epoch": 2.628072957969865, + "grad_norm": 0.7680177092552185, + "learning_rate": 1.2756049822526288e-06, + "loss": 0.0942, + "step": 830 + }, + { + "epoch": 2.6439333862014274, + "grad_norm": 0.9059642553329468, + "learning_rate": 1.172052726172494e-06, + "loss": 0.1159, + "step": 835 + }, + { + "epoch": 2.6597938144329896, + "grad_norm": 0.8205581307411194, + "learning_rate": 1.0727125967862972e-06, + "loss": 0.1025, + "step": 840 + }, + { + "epoch": 2.675654242664552, + "grad_norm": 0.8071274161338806, + "learning_rate": 9.776148540597834e-07, + "loss": 0.099, + "step": 845 + }, + { + "epoch": 2.6915146708961144, + "grad_norm": 0.8198668956756592, + "learning_rate": 8.867884656866181e-07, + "loss": 0.0977, + "step": 850 + }, + { + "epoch": 2.7073750991276766, + "grad_norm": 0.7336828112602234, + "learning_rate": 8.002610982645558e-07, + "loss": 0.0875, + "step": 855 + }, + { + "epoch": 2.723235527359239, + "grad_norm": 1.188350796699524, + "learning_rate": 7.180591088679212e-07, + "loss": 0.0944, + "step": 860 + }, + { + "epoch": 2.739095955590801, + "grad_norm": 0.7697141766548157, + "learning_rate": 6.402075370189914e-07, + "loss": 0.1012, + "step": 865 + }, + { + "epoch": 2.754956383822363, + "grad_norm": 0.8623362183570862, + "learning_rate": 5.667300970607192e-07, + "loss": 0.1151, + "step": 870 + }, + { + "epoch": 2.7708168120539254, + "grad_norm": 0.8793305158615112, + "learning_rate": 4.976491709331094e-07, + "loss": 0.0968, + "step": 875 + }, + { + "epoch": 2.7866772402854876, + "grad_norm": 0.7547869682312012, + "learning_rate": 4.3298580135546053e-07, + "loss": 0.095, + "step": 880 + }, + { + "epoch": 2.8025376685170498, + "grad_norm": 0.8234273791313171, + "learning_rate": 3.7275968541655104e-07, + "loss": 0.0989, + "step": 885 + }, + { + "epoch": 2.818398096748612, + "grad_norm": 0.7951599359512329, + "learning_rate": 3.16989168574725e-07, + "loss": 0.1025, + "step": 890 + }, + { + "epoch": 2.8342585249801746, + "grad_norm": 0.9393948912620544, + "learning_rate": 2.6569123906967083e-07, + "loss": 0.1132, + "step": 895 + }, + { + "epoch": 2.850118953211737, + "grad_norm": 0.8398211598396301, + "learning_rate": 2.1888152274764872e-07, + "loss": 0.0909, + "step": 900 + }, + { + "epoch": 2.865979381443299, + "grad_norm": 0.6961729526519775, + "learning_rate": 1.7657427830170824e-07, + "loss": 0.111, + "step": 905 + }, + { + "epoch": 2.881839809674861, + "grad_norm": 0.730242133140564, + "learning_rate": 1.3878239292834604e-07, + "loss": 0.1055, + "step": 910 + }, + { + "epoch": 2.8977002379064234, + "grad_norm": 0.6813839077949524, + "learning_rate": 1.0551737840194587e-07, + "loss": 0.0878, + "step": 915 + }, + { + "epoch": 2.9135606661379856, + "grad_norm": 0.8640697002410889, + "learning_rate": 7.67893675681769e-08, + "loss": 0.0977, + "step": 920 + }, + { + "epoch": 2.9294210943695482, + "grad_norm": 0.7399241924285889, + "learning_rate": 5.260711125743445e-08, + "loss": 0.1017, + "step": 925 + }, + { + "epoch": 2.9452815226011104, + "grad_norm": 0.7473828196525574, + "learning_rate": 3.2977975619250536e-08, + "loss": 0.0983, + "step": 930 + }, + { + "epoch": 2.9611419508326726, + "grad_norm": 0.6582476496696472, + "learning_rate": 1.7907939878490376e-08, + "loss": 0.1056, + "step": 935 + }, + { + "epoch": 2.977002379064235, + "grad_norm": 0.7286701798439026, + "learning_rate": 7.401594514026e-09, + "loss": 0.1057, + "step": 940 + }, + { + "epoch": 2.992862807295797, + "grad_norm": 0.8410643935203552, + "learning_rate": 1.4621398604364179e-09, + "loss": 0.0996, + "step": 945 + }, + { + "epoch": 3.0, + "step": 948, + "total_flos": 1.2070794603192648e+18, + "train_loss": 0.45383420935537244, + "train_runtime": 973.654, + "train_samples_per_second": 31.074, + "train_steps_per_second": 0.974 + } + ], + "logging_steps": 5, + "max_steps": 948, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2070794603192648e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b941553b0c2f5d1af7b02c4214353d6c8b01b3e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/2_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f5c21faf876c41233037db785c3069a4a880ca74ea37a5e414dda637a611d59 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..65b85271e2d771d19c61e21bce53bca3d62aa419 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 3_128_e3_3e-5 + results: [] +--- + + + +# 3_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ec4c0efdf7ddb53e56124a3f2902411eb43d0b6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "q_proj", + "down_proj", + "gate_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86fe4002a8458f0180fb0f9e57c2577565d560b7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc505f9ee872e66d843f823f59d97b0fc8f264c07829557bd351dbfa1e1cc59c +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd776dd4d66904b6c9aa44f34254bcd7ce7b478d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3890291165057516e+18, + "train_loss": 0.433336251311832, + "train_runtime": 1099.7913, + "train_samples": 11694, + "train_samples_per_second": 31.899, + "train_steps_per_second": 0.998 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd776dd4d66904b6c9aa44f34254bcd7ce7b478d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3890291165057516e+18, + "train_loss": 0.433336251311832, + "train_runtime": 1099.7913, + "train_samples": 11694, + "train_samples_per_second": 31.899, + "train_steps_per_second": 0.998 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..430072889bf7d454cca38e4ef49e92c53748fc88 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1576 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1098, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013679890560875513, + "grad_norm": 0.6260705590248108, + "learning_rate": 2.181818181818182e-06, + "loss": 1.5501, + "step": 5 + }, + { + "epoch": 0.027359781121751026, + "grad_norm": 0.6564420461654663, + "learning_rate": 4.90909090909091e-06, + "loss": 1.5529, + "step": 10 + }, + { + "epoch": 0.04103967168262654, + "grad_norm": 0.5666828155517578, + "learning_rate": 7.636363636363636e-06, + "loss": 1.563, + "step": 15 + }, + { + "epoch": 0.05471956224350205, + "grad_norm": 0.49279728531837463, + "learning_rate": 1.0363636363636364e-05, + "loss": 1.5113, + "step": 20 + }, + { + "epoch": 0.06839945280437756, + "grad_norm": 0.5169044137001038, + "learning_rate": 1.309090909090909e-05, + "loss": 1.4887, + "step": 25 + }, + { + "epoch": 0.08207934336525308, + "grad_norm": 0.5103289484977722, + "learning_rate": 1.5818181818181818e-05, + "loss": 1.4783, + "step": 30 + }, + { + "epoch": 0.09575923392612859, + "grad_norm": 0.5682989954948425, + "learning_rate": 1.8545454545454545e-05, + "loss": 1.4329, + "step": 35 + }, + { + "epoch": 0.1094391244870041, + "grad_norm": 0.5416489243507385, + "learning_rate": 2.1272727272727273e-05, + "loss": 1.4241, + "step": 40 + }, + { + "epoch": 0.12311901504787962, + "grad_norm": 0.507831335067749, + "learning_rate": 2.4e-05, + "loss": 1.4256, + "step": 45 + }, + { + "epoch": 0.13679890560875513, + "grad_norm": 0.6066713333129883, + "learning_rate": 2.6727272727272728e-05, + "loss": 1.4186, + "step": 50 + }, + { + "epoch": 0.15047879616963064, + "grad_norm": 0.6545333862304688, + "learning_rate": 2.9454545454545456e-05, + "loss": 1.3518, + "step": 55 + }, + { + "epoch": 0.16415868673050615, + "grad_norm": 0.5629597306251526, + "learning_rate": 2.9998911302762293e-05, + "loss": 1.3487, + "step": 60 + }, + { + "epoch": 0.17783857729138167, + "grad_norm": 0.7042645812034607, + "learning_rate": 2.9994488741083514e-05, + "loss": 1.3303, + "step": 65 + }, + { + "epoch": 0.19151846785225718, + "grad_norm": 0.5182560086250305, + "learning_rate": 2.9986665273697548e-05, + "loss": 1.3477, + "step": 70 + }, + { + "epoch": 0.2051983584131327, + "grad_norm": 0.7695909738540649, + "learning_rate": 2.997544267504801e-05, + "loss": 1.2603, + "step": 75 + }, + { + "epoch": 0.2188782489740082, + "grad_norm": 0.6369842886924744, + "learning_rate": 2.9960823490536772e-05, + "loss": 1.2939, + "step": 80 + }, + { + "epoch": 0.23255813953488372, + "grad_norm": 0.8093236088752747, + "learning_rate": 2.9942811035946656e-05, + "loss": 1.268, + "step": 85 + }, + { + "epoch": 0.24623803009575923, + "grad_norm": 0.6286530494689941, + "learning_rate": 2.9921409396689347e-05, + "loss": 1.2508, + "step": 90 + }, + { + "epoch": 0.25991792065663477, + "grad_norm": 0.6930716037750244, + "learning_rate": 2.9896623426878805e-05, + "loss": 1.2491, + "step": 95 + }, + { + "epoch": 0.27359781121751026, + "grad_norm": 0.7019796967506409, + "learning_rate": 2.9868458748230293e-05, + "loss": 1.1728, + "step": 100 + }, + { + "epoch": 0.2872777017783858, + "grad_norm": 0.8785877823829651, + "learning_rate": 2.983692174878531e-05, + "loss": 1.152, + "step": 105 + }, + { + "epoch": 0.3009575923392613, + "grad_norm": 0.7861308455467224, + "learning_rate": 2.980201958146272e-05, + "loss": 1.1293, + "step": 110 + }, + { + "epoch": 0.3146374829001368, + "grad_norm": 0.8343217372894287, + "learning_rate": 2.9763760162436405e-05, + "loss": 1.1243, + "step": 115 + }, + { + "epoch": 0.3283173734610123, + "grad_norm": 0.8703253865242004, + "learning_rate": 2.9722152169339765e-05, + "loss": 1.1423, + "step": 120 + }, + { + "epoch": 0.34199726402188785, + "grad_norm": 0.882481038570404, + "learning_rate": 2.967720503929759e-05, + "loss": 1.0159, + "step": 125 + }, + { + "epoch": 0.35567715458276333, + "grad_norm": 0.9939788579940796, + "learning_rate": 2.962892896678557e-05, + "loss": 1.0412, + "step": 130 + }, + { + "epoch": 0.3693570451436389, + "grad_norm": 0.8989612460136414, + "learning_rate": 2.9577334901318115e-05, + "loss": 1.0502, + "step": 135 + }, + { + "epoch": 0.38303693570451436, + "grad_norm": 0.9308570027351379, + "learning_rate": 2.952243454496488e-05, + "loss": 1.0389, + "step": 140 + }, + { + "epoch": 0.3967168262653899, + "grad_norm": 0.8815864324569702, + "learning_rate": 2.9464240349696625e-05, + "loss": 0.9782, + "step": 145 + }, + { + "epoch": 0.4103967168262654, + "grad_norm": 0.863875150680542, + "learning_rate": 2.9402765514560955e-05, + "loss": 0.9241, + "step": 150 + }, + { + "epoch": 0.4240766073871409, + "grad_norm": 0.9092949032783508, + "learning_rate": 2.9338023982688657e-05, + "loss": 0.9351, + "step": 155 + }, + { + "epoch": 0.4377564979480164, + "grad_norm": 0.9547214508056641, + "learning_rate": 2.9270030438131263e-05, + "loss": 0.8854, + "step": 160 + }, + { + "epoch": 0.45143638850889195, + "grad_norm": 0.8979869484901428, + "learning_rate": 2.9198800302530532e-05, + "loss": 0.8879, + "step": 165 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 1.308546781539917, + "learning_rate": 2.912434973162067e-05, + "loss": 0.9119, + "step": 170 + }, + { + "epoch": 0.478796169630643, + "grad_norm": 1.0112230777740479, + "learning_rate": 2.904669561156404e-05, + "loss": 0.9265, + "step": 175 + }, + { + "epoch": 0.49247606019151846, + "grad_norm": 1.0116804838180542, + "learning_rate": 2.8965855555121216e-05, + "loss": 0.8586, + "step": 180 + }, + { + "epoch": 0.506155950752394, + "grad_norm": 0.9462437629699707, + "learning_rate": 2.8881847897656224e-05, + "loss": 0.8745, + "step": 185 + }, + { + "epoch": 0.5198358413132695, + "grad_norm": 1.2495266199111938, + "learning_rate": 2.879469169297787e-05, + "loss": 0.8613, + "step": 190 + }, + { + "epoch": 0.533515731874145, + "grad_norm": 0.9954858422279358, + "learning_rate": 2.870440670901816e-05, + "loss": 0.858, + "step": 195 + }, + { + "epoch": 0.5471956224350205, + "grad_norm": 1.0665680170059204, + "learning_rate": 2.8611013423348727e-05, + "loss": 0.7902, + "step": 200 + }, + { + "epoch": 0.560875512995896, + "grad_norm": 0.9302067756652832, + "learning_rate": 2.8514533018536286e-05, + "loss": 0.8033, + "step": 205 + }, + { + "epoch": 0.5745554035567716, + "grad_norm": 1.2291796207427979, + "learning_rate": 2.841498737733824e-05, + "loss": 0.7738, + "step": 210 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.0912837982177734, + "learning_rate": 2.8312399077739407e-05, + "loss": 0.7717, + "step": 215 + }, + { + "epoch": 0.6019151846785226, + "grad_norm": 1.2541359663009644, + "learning_rate": 2.8206791387831136e-05, + "loss": 0.7567, + "step": 220 + }, + { + "epoch": 0.615595075239398, + "grad_norm": 1.1442549228668213, + "learning_rate": 2.8098188260533818e-05, + "loss": 0.781, + "step": 225 + }, + { + "epoch": 0.6292749658002736, + "grad_norm": 1.0966084003448486, + "learning_rate": 2.7986614328164168e-05, + "loss": 0.7035, + "step": 230 + }, + { + "epoch": 0.6429548563611491, + "grad_norm": 1.1035778522491455, + "learning_rate": 2.7872094896848307e-05, + "loss": 0.676, + "step": 235 + }, + { + "epoch": 0.6566347469220246, + "grad_norm": 1.3320432901382446, + "learning_rate": 2.7754655940782117e-05, + "loss": 0.675, + "step": 240 + }, + { + "epoch": 0.6703146374829001, + "grad_norm": 1.1223951578140259, + "learning_rate": 2.763432409633998e-05, + "loss": 0.6793, + "step": 245 + }, + { + "epoch": 0.6839945280437757, + "grad_norm": 1.1442415714263916, + "learning_rate": 2.751112665603341e-05, + "loss": 0.6791, + "step": 250 + }, + { + "epoch": 0.6976744186046512, + "grad_norm": 1.1760120391845703, + "learning_rate": 2.7385091562320808e-05, + "loss": 0.6825, + "step": 255 + }, + { + "epoch": 0.7113543091655267, + "grad_norm": 1.0478227138519287, + "learning_rate": 2.7256247401269814e-05, + "loss": 0.6332, + "step": 260 + }, + { + "epoch": 0.7250341997264022, + "grad_norm": 1.3080811500549316, + "learning_rate": 2.7124623396073715e-05, + "loss": 0.6578, + "step": 265 + }, + { + "epoch": 0.7387140902872777, + "grad_norm": 1.1674227714538574, + "learning_rate": 2.6990249400423305e-05, + "loss": 0.6772, + "step": 270 + }, + { + "epoch": 0.7523939808481532, + "grad_norm": 1.081085205078125, + "learning_rate": 2.685315589173576e-05, + "loss": 0.6589, + "step": 275 + }, + { + "epoch": 0.7660738714090287, + "grad_norm": 1.1427956819534302, + "learning_rate": 2.6713373964242043e-05, + "loss": 0.6739, + "step": 280 + }, + { + "epoch": 0.7797537619699042, + "grad_norm": 1.3702718019485474, + "learning_rate": 2.6570935321934417e-05, + "loss": 0.6108, + "step": 285 + }, + { + "epoch": 0.7934336525307798, + "grad_norm": 1.1371631622314453, + "learning_rate": 2.642587227137564e-05, + "loss": 0.5994, + "step": 290 + }, + { + "epoch": 0.8071135430916553, + "grad_norm": 1.1724711656570435, + "learning_rate": 2.6278217714371496e-05, + "loss": 0.5854, + "step": 295 + }, + { + "epoch": 0.8207934336525308, + "grad_norm": 1.2343699932098389, + "learning_rate": 2.612800514050836e-05, + "loss": 0.5536, + "step": 300 + }, + { + "epoch": 0.8344733242134063, + "grad_norm": 1.1748708486557007, + "learning_rate": 2.597526861955736e-05, + "loss": 0.6145, + "step": 305 + }, + { + "epoch": 0.8481532147742818, + "grad_norm": 1.202589988708496, + "learning_rate": 2.582004279374704e-05, + "loss": 0.6242, + "step": 310 + }, + { + "epoch": 0.8618331053351573, + "grad_norm": 1.2398837804794312, + "learning_rate": 2.5662362869906123e-05, + "loss": 0.5576, + "step": 315 + }, + { + "epoch": 0.8755129958960328, + "grad_norm": 1.2692333459854126, + "learning_rate": 2.5502264611478238e-05, + "loss": 0.5629, + "step": 320 + }, + { + "epoch": 0.8891928864569083, + "grad_norm": 1.1885793209075928, + "learning_rate": 2.5339784330410413e-05, + "loss": 0.4918, + "step": 325 + }, + { + "epoch": 0.9028727770177839, + "grad_norm": 1.2436736822128296, + "learning_rate": 2.5174958878917135e-05, + "loss": 0.4843, + "step": 330 + }, + { + "epoch": 0.9165526675786594, + "grad_norm": 1.1898037195205688, + "learning_rate": 2.500782564112188e-05, + "loss": 0.5178, + "step": 335 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 1.3402459621429443, + "learning_rate": 2.4838422524578027e-05, + "loss": 0.5042, + "step": 340 + }, + { + "epoch": 0.9439124487004104, + "grad_norm": 1.3176991939544678, + "learning_rate": 2.4666787951671013e-05, + "loss": 0.5326, + "step": 345 + }, + { + "epoch": 0.957592339261286, + "grad_norm": 1.2730348110198975, + "learning_rate": 2.4492960850903757e-05, + "loss": 0.5071, + "step": 350 + }, + { + "epoch": 0.9712722298221614, + "grad_norm": 1.427442193031311, + "learning_rate": 2.4316980648067225e-05, + "loss": 0.5054, + "step": 355 + }, + { + "epoch": 0.9849521203830369, + "grad_norm": 1.2667721509933472, + "learning_rate": 2.4138887257298317e-05, + "loss": 0.4561, + "step": 360 + }, + { + "epoch": 0.9986320109439124, + "grad_norm": 1.2596789598464966, + "learning_rate": 2.3958721072026893e-05, + "loss": 0.4779, + "step": 365 + }, + { + "epoch": 1.0109439124487003, + "grad_norm": 1.291460394859314, + "learning_rate": 2.3776522955814094e-05, + "loss": 0.4301, + "step": 370 + }, + { + "epoch": 1.024623803009576, + "grad_norm": 1.292645812034607, + "learning_rate": 2.3592334233084115e-05, + "loss": 0.3587, + "step": 375 + }, + { + "epoch": 1.0383036935704515, + "grad_norm": 1.0824459791183472, + "learning_rate": 2.3406196679751376e-05, + "loss": 0.4296, + "step": 380 + }, + { + "epoch": 1.051983584131327, + "grad_norm": 1.1791083812713623, + "learning_rate": 2.3218152513745306e-05, + "loss": 0.4046, + "step": 385 + }, + { + "epoch": 1.0656634746922025, + "grad_norm": 1.3448339700698853, + "learning_rate": 2.3028244385434863e-05, + "loss": 0.3574, + "step": 390 + }, + { + "epoch": 1.079343365253078, + "grad_norm": 1.3208296298980713, + "learning_rate": 2.283651536795504e-05, + "loss": 0.4083, + "step": 395 + }, + { + "epoch": 1.0930232558139534, + "grad_norm": 1.1809402704238892, + "learning_rate": 2.2643008947437368e-05, + "loss": 0.3893, + "step": 400 + }, + { + "epoch": 1.106703146374829, + "grad_norm": 1.5534876585006714, + "learning_rate": 2.244776901314685e-05, + "loss": 0.3672, + "step": 405 + }, + { + "epoch": 1.1203830369357046, + "grad_norm": 1.0918272733688354, + "learning_rate": 2.22508398475274e-05, + "loss": 0.3427, + "step": 410 + }, + { + "epoch": 1.13406292749658, + "grad_norm": 1.2426403760910034, + "learning_rate": 2.2052266116158157e-05, + "loss": 0.3507, + "step": 415 + }, + { + "epoch": 1.1477428180574556, + "grad_norm": 1.2878193855285645, + "learning_rate": 2.185209285762281e-05, + "loss": 0.3732, + "step": 420 + }, + { + "epoch": 1.161422708618331, + "grad_norm": 1.1829051971435547, + "learning_rate": 2.165036547329444e-05, + "loss": 0.347, + "step": 425 + }, + { + "epoch": 1.1751025991792066, + "grad_norm": 1.3631337881088257, + "learning_rate": 2.144712971703799e-05, + "loss": 0.4179, + "step": 430 + }, + { + "epoch": 1.188782489740082, + "grad_norm": 1.7393062114715576, + "learning_rate": 2.1242431684832802e-05, + "loss": 0.3909, + "step": 435 + }, + { + "epoch": 1.2024623803009575, + "grad_norm": 1.3592051267623901, + "learning_rate": 2.103631780431759e-05, + "loss": 0.3337, + "step": 440 + }, + { + "epoch": 1.216142270861833, + "grad_norm": 1.1530839204788208, + "learning_rate": 2.0828834824260168e-05, + "loss": 0.3721, + "step": 445 + }, + { + "epoch": 1.2298221614227085, + "grad_norm": 1.3716259002685547, + "learning_rate": 2.062002980395433e-05, + "loss": 0.3857, + "step": 450 + }, + { + "epoch": 1.2435020519835842, + "grad_norm": 1.2422486543655396, + "learning_rate": 2.0409950102546334e-05, + "loss": 0.2808, + "step": 455 + }, + { + "epoch": 1.2571819425444597, + "grad_norm": 1.3426320552825928, + "learning_rate": 2.0198643368293328e-05, + "loss": 0.3151, + "step": 460 + }, + { + "epoch": 1.2708618331053352, + "grad_norm": 1.2777618169784546, + "learning_rate": 1.998615752775626e-05, + "loss": 0.3228, + "step": 465 + }, + { + "epoch": 1.2845417236662107, + "grad_norm": 1.2025511264801025, + "learning_rate": 1.9772540774929624e-05, + "loss": 0.2867, + "step": 470 + }, + { + "epoch": 1.2982216142270862, + "grad_norm": 1.3845206499099731, + "learning_rate": 1.9557841560310556e-05, + "loss": 0.2986, + "step": 475 + }, + { + "epoch": 1.3119015047879616, + "grad_norm": 1.2037464380264282, + "learning_rate": 1.934210857990977e-05, + "loss": 0.2731, + "step": 480 + }, + { + "epoch": 1.3255813953488373, + "grad_norm": 1.3632595539093018, + "learning_rate": 1.912539076420678e-05, + "loss": 0.3539, + "step": 485 + }, + { + "epoch": 1.3392612859097128, + "grad_norm": 1.1612389087677002, + "learning_rate": 1.890773726705198e-05, + "loss": 0.2928, + "step": 490 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.3170897960662842, + "learning_rate": 1.8689197454518034e-05, + "loss": 0.3335, + "step": 495 + }, + { + "epoch": 1.3666210670314638, + "grad_norm": 1.2885876893997192, + "learning_rate": 1.846982089370312e-05, + "loss": 0.3125, + "step": 500 + }, + { + "epoch": 1.3803009575923393, + "grad_norm": 1.3305988311767578, + "learning_rate": 1.824965734148863e-05, + "loss": 0.3267, + "step": 505 + }, + { + "epoch": 1.3939808481532148, + "grad_norm": 1.2240204811096191, + "learning_rate": 1.8028756733253758e-05, + "loss": 0.273, + "step": 510 + }, + { + "epoch": 1.4076607387140903, + "grad_norm": 1.1328290700912476, + "learning_rate": 1.7807169171549677e-05, + "loss": 0.3364, + "step": 515 + }, + { + "epoch": 1.4213406292749657, + "grad_norm": 1.4519784450531006, + "learning_rate": 1.7584944914735713e-05, + "loss": 0.2986, + "step": 520 + }, + { + "epoch": 1.4350205198358412, + "grad_norm": 1.1849297285079956, + "learning_rate": 1.7362134365580268e-05, + "loss": 0.2626, + "step": 525 + }, + { + "epoch": 1.4487004103967167, + "grad_norm": 1.3708363771438599, + "learning_rate": 1.7138788059828935e-05, + "loss": 0.2612, + "step": 530 + }, + { + "epoch": 1.4623803009575924, + "grad_norm": 1.258382797241211, + "learning_rate": 1.6914956654742454e-05, + "loss": 0.2858, + "step": 535 + }, + { + "epoch": 1.476060191518468, + "grad_norm": 1.335835337638855, + "learning_rate": 1.6690690917607138e-05, + "loss": 0.2923, + "step": 540 + }, + { + "epoch": 1.4897400820793434, + "grad_norm": 1.3023345470428467, + "learning_rate": 1.6466041714220316e-05, + "loss": 0.2622, + "step": 545 + }, + { + "epoch": 1.5034199726402189, + "grad_norm": 1.2181967496871948, + "learning_rate": 1.6241059997353442e-05, + "loss": 0.2745, + "step": 550 + }, + { + "epoch": 1.5170998632010944, + "grad_norm": 1.2405407428741455, + "learning_rate": 1.6015796795195485e-05, + "loss": 0.2705, + "step": 555 + }, + { + "epoch": 1.53077975376197, + "grad_norm": 1.3937187194824219, + "learning_rate": 1.5790303199779194e-05, + "loss": 0.2387, + "step": 560 + }, + { + "epoch": 1.5444596443228455, + "grad_norm": 1.3076422214508057, + "learning_rate": 1.5564630355392902e-05, + "loss": 0.2692, + "step": 565 + }, + { + "epoch": 1.558139534883721, + "grad_norm": 1.387677788734436, + "learning_rate": 1.5338829446980464e-05, + "loss": 0.2507, + "step": 570 + }, + { + "epoch": 1.5718194254445965, + "grad_norm": 1.3450011014938354, + "learning_rate": 1.5112951688532002e-05, + "loss": 0.2637, + "step": 575 + }, + { + "epoch": 1.585499316005472, + "grad_norm": 1.5064235925674438, + "learning_rate": 1.4887048311468002e-05, + "loss": 0.2653, + "step": 580 + }, + { + "epoch": 1.5991792065663475, + "grad_norm": 1.285598635673523, + "learning_rate": 1.4661170553019537e-05, + "loss": 0.2925, + "step": 585 + }, + { + "epoch": 1.612859097127223, + "grad_norm": 1.2347300052642822, + "learning_rate": 1.4435369644607104e-05, + "loss": 0.2324, + "step": 590 + }, + { + "epoch": 1.6265389876880985, + "grad_norm": 1.3028322458267212, + "learning_rate": 1.4209696800220807e-05, + "loss": 0.2561, + "step": 595 + }, + { + "epoch": 1.640218878248974, + "grad_norm": 1.269512414932251, + "learning_rate": 1.3984203204804517e-05, + "loss": 0.2399, + "step": 600 + }, + { + "epoch": 1.6538987688098494, + "grad_norm": 1.1818124055862427, + "learning_rate": 1.3758940002646562e-05, + "loss": 0.2264, + "step": 605 + }, + { + "epoch": 1.667578659370725, + "grad_norm": 1.247979760169983, + "learning_rate": 1.3533958285779687e-05, + "loss": 0.207, + "step": 610 + }, + { + "epoch": 1.6812585499316004, + "grad_norm": 1.0938302278518677, + "learning_rate": 1.3309309082392864e-05, + "loss": 0.2205, + "step": 615 + }, + { + "epoch": 1.694938440492476, + "grad_norm": 1.2134047746658325, + "learning_rate": 1.3085043345257553e-05, + "loss": 0.2047, + "step": 620 + }, + { + "epoch": 1.7086183310533516, + "grad_norm": 1.1780530214309692, + "learning_rate": 1.2861211940171067e-05, + "loss": 0.2135, + "step": 625 + }, + { + "epoch": 1.722298221614227, + "grad_norm": 1.7106276750564575, + "learning_rate": 1.2637865634419735e-05, + "loss": 0.2174, + "step": 630 + }, + { + "epoch": 1.7359781121751026, + "grad_norm": 1.2476404905319214, + "learning_rate": 1.2415055085264289e-05, + "loss": 0.2161, + "step": 635 + }, + { + "epoch": 1.7496580027359783, + "grad_norm": 1.165145754814148, + "learning_rate": 1.2192830828450327e-05, + "loss": 0.2033, + "step": 640 + }, + { + "epoch": 1.7633378932968538, + "grad_norm": 1.2977901697158813, + "learning_rate": 1.1971243266746243e-05, + "loss": 0.2091, + "step": 645 + }, + { + "epoch": 1.7770177838577292, + "grad_norm": 1.0728585720062256, + "learning_rate": 1.175034265851137e-05, + "loss": 0.219, + "step": 650 + }, + { + "epoch": 1.7906976744186047, + "grad_norm": 1.4513641595840454, + "learning_rate": 1.1530179106296881e-05, + "loss": 0.2058, + "step": 655 + }, + { + "epoch": 1.8043775649794802, + "grad_norm": 1.2728991508483887, + "learning_rate": 1.131080254548197e-05, + "loss": 0.2127, + "step": 660 + }, + { + "epoch": 1.8180574555403557, + "grad_norm": 1.1003203392028809, + "learning_rate": 1.1092262732948017e-05, + "loss": 0.1881, + "step": 665 + }, + { + "epoch": 1.8317373461012312, + "grad_norm": 1.1751350164413452, + "learning_rate": 1.0874609235793222e-05, + "loss": 0.1827, + "step": 670 + }, + { + "epoch": 1.8454172366621067, + "grad_norm": 0.9615784287452698, + "learning_rate": 1.0657891420090236e-05, + "loss": 0.1743, + "step": 675 + }, + { + "epoch": 1.8590971272229821, + "grad_norm": 1.1404980421066284, + "learning_rate": 1.0442158439689444e-05, + "loss": 0.1709, + "step": 680 + }, + { + "epoch": 1.8727770177838576, + "grad_norm": 1.2519264221191406, + "learning_rate": 1.0227459225070379e-05, + "loss": 0.1748, + "step": 685 + }, + { + "epoch": 1.8864569083447331, + "grad_norm": 1.1060410737991333, + "learning_rate": 1.0013842472243742e-05, + "loss": 0.156, + "step": 690 + }, + { + "epoch": 1.9001367989056086, + "grad_norm": 1.0037962198257446, + "learning_rate": 9.801356631706676e-06, + "loss": 0.1659, + "step": 695 + }, + { + "epoch": 1.9138166894664843, + "grad_norm": 1.1736907958984375, + "learning_rate": 9.590049897453668e-06, + "loss": 0.1661, + "step": 700 + }, + { + "epoch": 1.9274965800273598, + "grad_norm": 1.2438607215881348, + "learning_rate": 9.379970196045672e-06, + "loss": 0.1626, + "step": 705 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.2652961015701294, + "learning_rate": 9.171165175739832e-06, + "loss": 0.1795, + "step": 710 + }, + { + "epoch": 1.9548563611491108, + "grad_norm": 1.3451100587844849, + "learning_rate": 8.96368219568241e-06, + "loss": 0.1819, + "step": 715 + }, + { + "epoch": 1.9685362517099865, + "grad_norm": 1.0832020044326782, + "learning_rate": 8.7575683151672e-06, + "loss": 0.1668, + "step": 720 + }, + { + "epoch": 1.982216142270862, + "grad_norm": 1.1644564867019653, + "learning_rate": 8.552870282962012e-06, + "loss": 0.1423, + "step": 725 + }, + { + "epoch": 1.9958960328317374, + "grad_norm": 1.1190474033355713, + "learning_rate": 8.349634526705558e-06, + "loss": 0.1561, + "step": 730 + }, + { + "epoch": 2.008207934336525, + "grad_norm": 1.2087862491607666, + "learning_rate": 8.147907142377198e-06, + "loss": 0.1509, + "step": 735 + }, + { + "epoch": 2.0218878248974006, + "grad_norm": 1.0958341360092163, + "learning_rate": 7.947733883841847e-06, + "loss": 0.1241, + "step": 740 + }, + { + "epoch": 2.0355677154582765, + "grad_norm": 0.9743949174880981, + "learning_rate": 7.749160152472603e-06, + "loss": 0.1562, + "step": 745 + }, + { + "epoch": 2.049247606019152, + "grad_norm": 1.1121829748153687, + "learning_rate": 7.552230986853153e-06, + "loss": 0.1304, + "step": 750 + }, + { + "epoch": 2.0629274965800275, + "grad_norm": 1.0818952322006226, + "learning_rate": 7.35699105256263e-06, + "loss": 0.1107, + "step": 755 + }, + { + "epoch": 2.076607387140903, + "grad_norm": 1.1593421697616577, + "learning_rate": 7.1634846320449625e-06, + "loss": 0.1304, + "step": 760 + }, + { + "epoch": 2.0902872777017785, + "grad_norm": 0.9990052580833435, + "learning_rate": 6.971755614565131e-06, + "loss": 0.1157, + "step": 765 + }, + { + "epoch": 2.103967168262654, + "grad_norm": 1.163517951965332, + "learning_rate": 6.781847486254698e-06, + "loss": 0.1212, + "step": 770 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 0.8749842643737793, + "learning_rate": 6.593803320248625e-06, + "loss": 0.1286, + "step": 775 + }, + { + "epoch": 2.131326949384405, + "grad_norm": 1.0393388271331787, + "learning_rate": 6.407665766915886e-06, + "loss": 0.1147, + "step": 780 + }, + { + "epoch": 2.1450068399452804, + "grad_norm": 1.078455924987793, + "learning_rate": 6.223477044185909e-06, + "loss": 0.1379, + "step": 785 + }, + { + "epoch": 2.158686730506156, + "grad_norm": 1.5677859783172607, + "learning_rate": 6.04127892797311e-06, + "loss": 0.1217, + "step": 790 + }, + { + "epoch": 2.1723666210670314, + "grad_norm": 1.0757791996002197, + "learning_rate": 5.861112742701678e-06, + "loss": 0.1051, + "step": 795 + }, + { + "epoch": 2.186046511627907, + "grad_norm": 0.9902474880218506, + "learning_rate": 5.683019351932775e-06, + "loss": 0.1323, + "step": 800 + }, + { + "epoch": 2.1997264021887823, + "grad_norm": 0.964462161064148, + "learning_rate": 5.507039149096251e-06, + "loss": 0.1072, + "step": 805 + }, + { + "epoch": 2.213406292749658, + "grad_norm": 0.8716784119606018, + "learning_rate": 5.333212048328983e-06, + "loss": 0.1111, + "step": 810 + }, + { + "epoch": 2.2270861833105333, + "grad_norm": 0.9398632645606995, + "learning_rate": 5.161577475421978e-06, + "loss": 0.103, + "step": 815 + }, + { + "epoch": 2.2407660738714092, + "grad_norm": 1.1250271797180176, + "learning_rate": 4.992174358878126e-06, + "loss": 0.1307, + "step": 820 + }, + { + "epoch": 2.2544459644322847, + "grad_norm": 0.8738728165626526, + "learning_rate": 4.82504112108287e-06, + "loss": 0.1054, + "step": 825 + }, + { + "epoch": 2.26812585499316, + "grad_norm": 0.9868120551109314, + "learning_rate": 4.660215669589589e-06, + "loss": 0.0934, + "step": 830 + }, + { + "epoch": 2.2818057455540357, + "grad_norm": 1.1148253679275513, + "learning_rate": 4.497735388521762e-06, + "loss": 0.1151, + "step": 835 + }, + { + "epoch": 2.295485636114911, + "grad_norm": 1.2891764640808105, + "learning_rate": 4.337637130093879e-06, + "loss": 0.1206, + "step": 840 + }, + { + "epoch": 2.3091655266757867, + "grad_norm": 1.0749719142913818, + "learning_rate": 4.179957206252962e-06, + "loss": 0.1074, + "step": 845 + }, + { + "epoch": 2.322845417236662, + "grad_norm": 1.0261924266815186, + "learning_rate": 4.0247313804426455e-06, + "loss": 0.125, + "step": 850 + }, + { + "epoch": 2.3365253077975376, + "grad_norm": 0.8548686504364014, + "learning_rate": 3.871994859491643e-06, + "loss": 0.1112, + "step": 855 + }, + { + "epoch": 2.350205198358413, + "grad_norm": 1.1603178977966309, + "learning_rate": 3.7217822856285087e-06, + "loss": 0.098, + "step": 860 + }, + { + "epoch": 2.3638850889192886, + "grad_norm": 0.9653047323226929, + "learning_rate": 3.574127728624365e-06, + "loss": 0.111, + "step": 865 + }, + { + "epoch": 2.377564979480164, + "grad_norm": 0.9625891447067261, + "learning_rate": 3.429064678065584e-06, + "loss": 0.1053, + "step": 870 + }, + { + "epoch": 2.3912448700410396, + "grad_norm": 1.0057806968688965, + "learning_rate": 3.28662603575796e-06, + "loss": 0.1105, + "step": 875 + }, + { + "epoch": 2.404924760601915, + "grad_norm": 1.095563530921936, + "learning_rate": 3.1468441082642396e-06, + "loss": 0.1116, + "step": 880 + }, + { + "epoch": 2.4186046511627906, + "grad_norm": 0.7796836495399475, + "learning_rate": 3.009750599576698e-06, + "loss": 0.0979, + "step": 885 + }, + { + "epoch": 2.432284541723666, + "grad_norm": 0.8454521894454956, + "learning_rate": 2.8753766039262872e-06, + "loss": 0.092, + "step": 890 + }, + { + "epoch": 2.4459644322845415, + "grad_norm": 0.9276688098907471, + "learning_rate": 2.7437525987301887e-06, + "loss": 0.1184, + "step": 895 + }, + { + "epoch": 2.459644322845417, + "grad_norm": 0.8594542145729065, + "learning_rate": 2.614908437679195e-06, + "loss": 0.1107, + "step": 900 + }, + { + "epoch": 2.473324213406293, + "grad_norm": 1.17416512966156, + "learning_rate": 2.4888733439665895e-06, + "loss": 0.1035, + "step": 905 + }, + { + "epoch": 2.4870041039671684, + "grad_norm": 0.8833194971084595, + "learning_rate": 2.365675903660019e-06, + "loss": 0.0949, + "step": 910 + }, + { + "epoch": 2.500683994528044, + "grad_norm": 1.1180675029754639, + "learning_rate": 2.2453440592178837e-06, + "loss": 0.1106, + "step": 915 + }, + { + "epoch": 2.5143638850889194, + "grad_norm": 0.6940875053405762, + "learning_rate": 2.1279051031516926e-06, + "loss": 0.1007, + "step": 920 + }, + { + "epoch": 2.528043775649795, + "grad_norm": 0.9491432905197144, + "learning_rate": 2.013385671835831e-06, + "loss": 0.1133, + "step": 925 + }, + { + "epoch": 2.5417236662106704, + "grad_norm": 0.767942488193512, + "learning_rate": 1.9018117394661816e-06, + "loss": 0.089, + "step": 930 + }, + { + "epoch": 2.555403556771546, + "grad_norm": 0.7856757044792175, + "learning_rate": 1.7932086121688668e-06, + "loss": 0.0815, + "step": 935 + }, + { + "epoch": 2.5690834473324213, + "grad_norm": 1.089542031288147, + "learning_rate": 1.6876009222605926e-06, + "loss": 0.0948, + "step": 940 + }, + { + "epoch": 2.582763337893297, + "grad_norm": 0.9344882965087891, + "learning_rate": 1.5850126226617611e-06, + "loss": 0.0999, + "step": 945 + }, + { + "epoch": 2.5964432284541723, + "grad_norm": 0.809226930141449, + "learning_rate": 1.4854669814637145e-06, + "loss": 0.1093, + "step": 950 + }, + { + "epoch": 2.610123119015048, + "grad_norm": 0.7643980979919434, + "learning_rate": 1.388986576651276e-06, + "loss": 0.1039, + "step": 955 + }, + { + "epoch": 2.6238030095759233, + "grad_norm": 0.8472519516944885, + "learning_rate": 1.2955932909818403e-06, + "loss": 0.0927, + "step": 960 + }, + { + "epoch": 2.6374829001367988, + "grad_norm": 0.8811158537864685, + "learning_rate": 1.2053083070221326e-06, + "loss": 0.1055, + "step": 965 + }, + { + "epoch": 2.6511627906976747, + "grad_norm": 0.7810266017913818, + "learning_rate": 1.1181521023437751e-06, + "loss": 0.0963, + "step": 970 + }, + { + "epoch": 2.66484268125855, + "grad_norm": 0.7509508728981018, + "learning_rate": 1.034144444878784e-06, + "loss": 0.0972, + "step": 975 + }, + { + "epoch": 2.6785225718194257, + "grad_norm": 0.8772942423820496, + "learning_rate": 9.533043884359616e-07, + "loss": 0.0848, + "step": 980 + }, + { + "epoch": 2.692202462380301, + "grad_norm": 0.667640745639801, + "learning_rate": 8.756502683793366e-07, + "loss": 0.0892, + "step": 985 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.7099531292915344, + "learning_rate": 8.011996974694708e-07, + "loss": 0.0894, + "step": 990 + }, + { + "epoch": 2.719562243502052, + "grad_norm": 0.9716089367866516, + "learning_rate": 7.299695618687357e-07, + "loss": 0.0892, + "step": 995 + }, + { + "epoch": 2.7332421340629276, + "grad_norm": 0.7508392930030823, + "learning_rate": 6.619760173113437e-07, + "loss": 0.0873, + "step": 1000 + }, + { + "epoch": 2.746922024623803, + "grad_norm": 0.7627072930335999, + "learning_rate": 5.972344854390482e-07, + "loss": 0.1053, + "step": 1005 + }, + { + "epoch": 2.7606019151846786, + "grad_norm": 0.8867163062095642, + "learning_rate": 5.357596503033773e-07, + "loss": 0.0963, + "step": 1010 + }, + { + "epoch": 2.774281805745554, + "grad_norm": 1.0944985151290894, + "learning_rate": 4.775654550351194e-07, + "loss": 0.0976, + "step": 1015 + }, + { + "epoch": 2.7879616963064295, + "grad_norm": 0.8190701603889465, + "learning_rate": 4.2266509868188584e-07, + "loss": 0.0919, + "step": 1020 + }, + { + "epoch": 2.801641586867305, + "grad_norm": 0.6996578574180603, + "learning_rate": 3.7107103321443125e-07, + "loss": 0.0952, + "step": 1025 + }, + { + "epoch": 2.8153214774281805, + "grad_norm": 0.70615154504776, + "learning_rate": 3.2279496070241053e-07, + "loss": 0.0818, + "step": 1030 + }, + { + "epoch": 2.829001367989056, + "grad_norm": 0.7731340527534485, + "learning_rate": 2.7784783066023553e-07, + "loss": 0.1011, + "step": 1035 + }, + { + "epoch": 2.8426812585499315, + "grad_norm": 0.941338300704956, + "learning_rate": 2.3623983756359825e-07, + "loss": 0.086, + "step": 1040 + }, + { + "epoch": 2.856361149110807, + "grad_norm": 0.7818951606750488, + "learning_rate": 1.979804185372802e-07, + "loss": 0.0991, + "step": 1045 + }, + { + "epoch": 2.8700410396716824, + "grad_norm": 0.6519285440444946, + "learning_rate": 1.6307825121469165e-07, + "loss": 0.0693, + "step": 1050 + }, + { + "epoch": 2.883720930232558, + "grad_norm": 0.7095218300819397, + "learning_rate": 1.3154125176970732e-07, + "loss": 0.1028, + "step": 1055 + }, + { + "epoch": 2.8974008207934334, + "grad_norm": 0.8929292559623718, + "learning_rate": 1.0337657312119441e-07, + "loss": 0.0997, + "step": 1060 + }, + { + "epoch": 2.911080711354309, + "grad_norm": 0.8118662238121033, + "learning_rate": 7.859060331065371e-08, + "loss": 0.0883, + "step": 1065 + }, + { + "epoch": 2.924760601915185, + "grad_norm": 0.7753264307975769, + "learning_rate": 5.7188964053345174e-08, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 2.9384404924760603, + "grad_norm": 0.6951804757118225, + "learning_rate": 3.9176509463227926e-08, + "loss": 0.0934, + "step": 1075 + }, + { + "epoch": 2.952120383036936, + "grad_norm": 0.7601253390312195, + "learning_rate": 2.4557324951994253e-08, + "loss": 0.0944, + "step": 1080 + }, + { + "epoch": 2.9658002735978113, + "grad_norm": 0.7850967049598694, + "learning_rate": 1.3334726302454136e-08, + "loss": 0.0858, + "step": 1085 + }, + { + "epoch": 2.9794801641586868, + "grad_norm": 0.7441933155059814, + "learning_rate": 5.511258916485185e-09, + "loss": 0.0902, + "step": 1090 + }, + { + "epoch": 2.9931600547195623, + "grad_norm": 0.6794942617416382, + "learning_rate": 1.088697237709435e-09, + "loss": 0.0769, + "step": 1095 + }, + { + "epoch": 3.0, + "step": 1098, + "total_flos": 1.3890291165057516e+18, + "train_loss": 0.433336251311832, + "train_runtime": 1099.7913, + "train_samples_per_second": 31.899, + "train_steps_per_second": 0.998 + } + ], + "logging_steps": 5, + "max_steps": 1098, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3890291165057516e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fb5d9ba7d537fa2386632d9c4a82ad298767da3 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/3_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54b83911c5a58280cd9cee06d70a88744c6fbafd272302564dafe1be7fad055 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7ea75c3490f8e211f7fd4a0c6e4246c2208b89d1 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 4_128_e3_3e-5 + results: [] +--- + + + +# 4_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b38cb6291aa1d4adb528fd85601e9edd7ccd89c4 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "up_proj", + "down_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c9f0637bbb92609454d3d7cef6f0ac7389af40f3 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40606ede977f23b6eb82b861d81ed2ff9c406248f2c1d696dfa0dbea9962d4c9 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..64dd873ee835434556202cf3159aa39c8ae08bb7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2667695529522627e+18, + "train_loss": 0.42366581120301244, + "train_runtime": 1008.4374, + "train_samples": 10720, + "train_samples_per_second": 31.891, + "train_steps_per_second": 0.997 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..64dd873ee835434556202cf3159aa39c8ae08bb7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2667695529522627e+18, + "train_loss": 0.42366581120301244, + "train_runtime": 1008.4374, + "train_samples": 10720, + "train_samples_per_second": 31.891, + "train_steps_per_second": 0.997 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6f0efd6a230416a295c5a40261199bf24e1009ad --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1450 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1005, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014925373134328358, + "grad_norm": 0.7206871509552002, + "learning_rate": 2.3529411764705885e-06, + "loss": 1.6233, + "step": 5 + }, + { + "epoch": 0.029850746268656716, + "grad_norm": 0.5696051716804504, + "learning_rate": 5.294117647058824e-06, + "loss": 1.5922, + "step": 10 + }, + { + "epoch": 0.04477611940298507, + "grad_norm": 0.550079345703125, + "learning_rate": 8.23529411764706e-06, + "loss": 1.5673, + "step": 15 + }, + { + "epoch": 0.05970149253731343, + "grad_norm": 0.5587111711502075, + "learning_rate": 1.1176470588235295e-05, + "loss": 1.5674, + "step": 20 + }, + { + "epoch": 0.07462686567164178, + "grad_norm": 0.47961002588272095, + "learning_rate": 1.411764705882353e-05, + "loss": 1.4776, + "step": 25 + }, + { + "epoch": 0.08955223880597014, + "grad_norm": 0.5240439176559448, + "learning_rate": 1.7058823529411763e-05, + "loss": 1.4966, + "step": 30 + }, + { + "epoch": 0.1044776119402985, + "grad_norm": 0.4861885607242584, + "learning_rate": 1.9999999999999998e-05, + "loss": 1.5044, + "step": 35 + }, + { + "epoch": 0.11940298507462686, + "grad_norm": 0.4439212679862976, + "learning_rate": 2.2941176470588233e-05, + "loss": 1.5095, + "step": 40 + }, + { + "epoch": 0.13432835820895522, + "grad_norm": 0.5109625458717346, + "learning_rate": 2.5882352941176472e-05, + "loss": 1.3902, + "step": 45 + }, + { + "epoch": 0.14925373134328357, + "grad_norm": 0.5104894638061523, + "learning_rate": 2.8823529411764707e-05, + "loss": 1.368, + "step": 50 + }, + { + "epoch": 0.16417910447761194, + "grad_norm": 0.5414615273475647, + "learning_rate": 2.9999268013221688e-05, + "loss": 1.3333, + "step": 55 + }, + { + "epoch": 0.1791044776119403, + "grad_norm": 0.5404403209686279, + "learning_rate": 2.9994795019399927e-05, + "loss": 1.3357, + "step": 60 + }, + { + "epoch": 0.19402985074626866, + "grad_norm": 0.5749793648719788, + "learning_rate": 2.9986256902246587e-05, + "loss": 1.3004, + "step": 65 + }, + { + "epoch": 0.208955223880597, + "grad_norm": 0.713896632194519, + "learning_rate": 2.9973655976464824e-05, + "loss": 1.2886, + "step": 70 + }, + { + "epoch": 0.22388059701492538, + "grad_norm": 0.6050195097923279, + "learning_rate": 2.995699565819452e-05, + "loss": 1.2167, + "step": 75 + }, + { + "epoch": 0.23880597014925373, + "grad_norm": 0.64478600025177, + "learning_rate": 2.993628046408618e-05, + "loss": 1.1909, + "step": 80 + }, + { + "epoch": 0.2537313432835821, + "grad_norm": 0.7003644108772278, + "learning_rate": 2.991151601007646e-05, + "loss": 1.2064, + "step": 85 + }, + { + "epoch": 0.26865671641791045, + "grad_norm": 0.8628835082054138, + "learning_rate": 2.9882709009865653e-05, + "loss": 1.1438, + "step": 90 + }, + { + "epoch": 0.2835820895522388, + "grad_norm": 0.9073294997215271, + "learning_rate": 2.98498672730976e-05, + "loss": 1.1514, + "step": 95 + }, + { + "epoch": 0.29850746268656714, + "grad_norm": 0.7668473124504089, + "learning_rate": 2.9812999703242502e-05, + "loss": 1.1638, + "step": 100 + }, + { + "epoch": 0.31343283582089554, + "grad_norm": 0.8440659046173096, + "learning_rate": 2.977211629518312e-05, + "loss": 1.093, + "step": 105 + }, + { + "epoch": 0.3283582089552239, + "grad_norm": 0.9054542779922485, + "learning_rate": 2.9727228132505178e-05, + "loss": 1.149, + "step": 110 + }, + { + "epoch": 0.34328358208955223, + "grad_norm": 0.8470582962036133, + "learning_rate": 2.967834738449256e-05, + "loss": 1.1015, + "step": 115 + }, + { + "epoch": 0.3582089552238806, + "grad_norm": 0.8039938807487488, + "learning_rate": 2.9625487302828198e-05, + "loss": 1.0651, + "step": 120 + }, + { + "epoch": 0.373134328358209, + "grad_norm": 0.8265600800514221, + "learning_rate": 2.956866221800151e-05, + "loss": 1.0129, + "step": 125 + }, + { + "epoch": 0.3880597014925373, + "grad_norm": 0.8602685332298279, + "learning_rate": 2.9507887535423357e-05, + "loss": 0.9838, + "step": 130 + }, + { + "epoch": 0.40298507462686567, + "grad_norm": 0.7970941662788391, + "learning_rate": 2.944317973124962e-05, + "loss": 0.9368, + "step": 135 + }, + { + "epoch": 0.417910447761194, + "grad_norm": 0.9532618522644043, + "learning_rate": 2.937455634791447e-05, + "loss": 0.9282, + "step": 140 + }, + { + "epoch": 0.43283582089552236, + "grad_norm": 0.9605815410614014, + "learning_rate": 2.9302035989374565e-05, + "loss": 0.9164, + "step": 145 + }, + { + "epoch": 0.44776119402985076, + "grad_norm": 0.9454911351203918, + "learning_rate": 2.9225638316065483e-05, + "loss": 0.8597, + "step": 150 + }, + { + "epoch": 0.4626865671641791, + "grad_norm": 0.9787341356277466, + "learning_rate": 2.9145384039571743e-05, + "loss": 0.9085, + "step": 155 + }, + { + "epoch": 0.47761194029850745, + "grad_norm": 1.0200845003128052, + "learning_rate": 2.9061294917011817e-05, + "loss": 0.8665, + "step": 160 + }, + { + "epoch": 0.4925373134328358, + "grad_norm": 1.098917841911316, + "learning_rate": 2.897339374513975e-05, + "loss": 0.8558, + "step": 165 + }, + { + "epoch": 0.5074626865671642, + "grad_norm": 0.93587726354599, + "learning_rate": 2.888170435416491e-05, + "loss": 0.8659, + "step": 170 + }, + { + "epoch": 0.5223880597014925, + "grad_norm": 1.2869884967803955, + "learning_rate": 2.878625160129155e-05, + "loss": 0.8038, + "step": 175 + }, + { + "epoch": 0.5373134328358209, + "grad_norm": 1.0285530090332031, + "learning_rate": 2.8687061363979963e-05, + "loss": 0.8326, + "step": 180 + }, + { + "epoch": 0.5522388059701493, + "grad_norm": 1.1656824350357056, + "learning_rate": 2.858416053293105e-05, + "loss": 0.8315, + "step": 185 + }, + { + "epoch": 0.5671641791044776, + "grad_norm": 1.1553387641906738, + "learning_rate": 2.84775770047962e-05, + "loss": 0.7681, + "step": 190 + }, + { + "epoch": 0.582089552238806, + "grad_norm": 1.0321202278137207, + "learning_rate": 2.8367339674614402e-05, + "loss": 0.7505, + "step": 195 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 1.0478719472885132, + "learning_rate": 2.825347842797879e-05, + "loss": 0.7174, + "step": 200 + }, + { + "epoch": 0.6119402985074627, + "grad_norm": 1.1063668727874756, + "learning_rate": 2.8136024132934552e-05, + "loss": 0.7261, + "step": 205 + }, + { + "epoch": 0.6268656716417911, + "grad_norm": 1.212662935256958, + "learning_rate": 2.8015008631610545e-05, + "loss": 0.7318, + "step": 210 + }, + { + "epoch": 0.6417910447761194, + "grad_norm": 1.2168813943862915, + "learning_rate": 2.789046473158682e-05, + "loss": 0.7245, + "step": 215 + }, + { + "epoch": 0.6567164179104478, + "grad_norm": 1.0795903205871582, + "learning_rate": 2.7762426197000404e-05, + "loss": 0.682, + "step": 220 + }, + { + "epoch": 0.6716417910447762, + "grad_norm": 1.124311923980713, + "learning_rate": 2.763092773939177e-05, + "loss": 0.6683, + "step": 225 + }, + { + "epoch": 0.6865671641791045, + "grad_norm": 1.148598313331604, + "learning_rate": 2.749600500829448e-05, + "loss": 0.7251, + "step": 230 + }, + { + "epoch": 0.7014925373134329, + "grad_norm": 1.077394723892212, + "learning_rate": 2.7357694581570475e-05, + "loss": 0.6908, + "step": 235 + }, + { + "epoch": 0.7164179104477612, + "grad_norm": 1.2210252285003662, + "learning_rate": 2.7216033955493756e-05, + "loss": 0.6278, + "step": 240 + }, + { + "epoch": 0.7313432835820896, + "grad_norm": 1.246825098991394, + "learning_rate": 2.7071061534585064e-05, + "loss": 0.6119, + "step": 245 + }, + { + "epoch": 0.746268656716418, + "grad_norm": 1.297675371170044, + "learning_rate": 2.6922816621200302e-05, + "loss": 0.5606, + "step": 250 + }, + { + "epoch": 0.7611940298507462, + "grad_norm": 1.1701496839523315, + "learning_rate": 2.6771339404875602e-05, + "loss": 0.6374, + "step": 255 + }, + { + "epoch": 0.7761194029850746, + "grad_norm": 1.24527108669281, + "learning_rate": 2.6616670951431842e-05, + "loss": 0.5901, + "step": 260 + }, + { + "epoch": 0.7910447761194029, + "grad_norm": 1.378541350364685, + "learning_rate": 2.645885319184159e-05, + "loss": 0.5911, + "step": 265 + }, + { + "epoch": 0.8059701492537313, + "grad_norm": 1.1480319499969482, + "learning_rate": 2.6297928910861546e-05, + "loss": 0.5937, + "step": 270 + }, + { + "epoch": 0.8208955223880597, + "grad_norm": 1.2642405033111572, + "learning_rate": 2.6133941735433496e-05, + "loss": 0.5644, + "step": 275 + }, + { + "epoch": 0.835820895522388, + "grad_norm": 1.4720721244812012, + "learning_rate": 2.596693612285691e-05, + "loss": 0.5109, + "step": 280 + }, + { + "epoch": 0.8507462686567164, + "grad_norm": 1.2557884454727173, + "learning_rate": 2.5796957348736522e-05, + "loss": 0.5366, + "step": 285 + }, + { + "epoch": 0.8656716417910447, + "grad_norm": 1.224118709564209, + "learning_rate": 2.5624051494707967e-05, + "loss": 0.5404, + "step": 290 + }, + { + "epoch": 0.8805970149253731, + "grad_norm": 1.6599620580673218, + "learning_rate": 2.5448265435944957e-05, + "loss": 0.4514, + "step": 295 + }, + { + "epoch": 0.8955223880597015, + "grad_norm": 1.2880064249038696, + "learning_rate": 2.5269646828451323e-05, + "loss": 0.5279, + "step": 300 + }, + { + "epoch": 0.9104477611940298, + "grad_norm": 1.1442257165908813, + "learning_rate": 2.5088244096141355e-05, + "loss": 0.5115, + "step": 305 + }, + { + "epoch": 0.9253731343283582, + "grad_norm": 1.2725549936294556, + "learning_rate": 2.490410641771196e-05, + "loss": 0.4831, + "step": 310 + }, + { + "epoch": 0.9402985074626866, + "grad_norm": 1.186773419380188, + "learning_rate": 2.4717283713310224e-05, + "loss": 0.4825, + "step": 315 + }, + { + "epoch": 0.9552238805970149, + "grad_norm": 1.2619786262512207, + "learning_rate": 2.4527826630999922e-05, + "loss": 0.486, + "step": 320 + }, + { + "epoch": 0.9701492537313433, + "grad_norm": 1.5315459966659546, + "learning_rate": 2.4335786533030736e-05, + "loss": 0.4577, + "step": 325 + }, + { + "epoch": 0.9850746268656716, + "grad_norm": 1.3151711225509644, + "learning_rate": 2.414121548191381e-05, + "loss": 0.4807, + "step": 330 + }, + { + "epoch": 1.0, + "grad_norm": 1.1066868305206299, + "learning_rate": 2.39441662263075e-05, + "loss": 0.4412, + "step": 335 + }, + { + "epoch": 1.0149253731343284, + "grad_norm": 1.288899302482605, + "learning_rate": 2.374469218671708e-05, + "loss": 0.4067, + "step": 340 + }, + { + "epoch": 1.0298507462686568, + "grad_norm": 1.2430055141448975, + "learning_rate": 2.3542847441012325e-05, + "loss": 0.3849, + "step": 345 + }, + { + "epoch": 1.044776119402985, + "grad_norm": 1.3153514862060547, + "learning_rate": 2.333868670976688e-05, + "loss": 0.3843, + "step": 350 + }, + { + "epoch": 1.0597014925373134, + "grad_norm": 1.454361915588379, + "learning_rate": 2.3132265341423382e-05, + "loss": 0.3399, + "step": 355 + }, + { + "epoch": 1.0746268656716418, + "grad_norm": 1.283898949623108, + "learning_rate": 2.292363929728836e-05, + "loss": 0.3907, + "step": 360 + }, + { + "epoch": 1.0895522388059702, + "grad_norm": 1.3252501487731934, + "learning_rate": 2.2712865136361037e-05, + "loss": 0.4114, + "step": 365 + }, + { + "epoch": 1.1044776119402986, + "grad_norm": 1.400173544883728, + "learning_rate": 2.25e-05, + "loss": 0.3433, + "step": 370 + }, + { + "epoch": 1.1194029850746268, + "grad_norm": 1.1993721723556519, + "learning_rate": 2.2285101596432084e-05, + "loss": 0.3636, + "step": 375 + }, + { + "epoch": 1.1343283582089552, + "grad_norm": 1.2795064449310303, + "learning_rate": 2.2068228185107524e-05, + "loss": 0.348, + "step": 380 + }, + { + "epoch": 1.1492537313432836, + "grad_norm": 1.248583436012268, + "learning_rate": 2.1849438560905697e-05, + "loss": 0.3301, + "step": 385 + }, + { + "epoch": 1.164179104477612, + "grad_norm": 1.4759471416473389, + "learning_rate": 2.162879203819568e-05, + "loss": 0.3341, + "step": 390 + }, + { + "epoch": 1.1791044776119404, + "grad_norm": 1.3039376735687256, + "learning_rate": 2.1406348434755994e-05, + "loss": 0.3628, + "step": 395 + }, + { + "epoch": 1.1940298507462686, + "grad_norm": 1.615215539932251, + "learning_rate": 2.118216805555788e-05, + "loss": 0.333, + "step": 400 + }, + { + "epoch": 1.208955223880597, + "grad_norm": 1.2891199588775635, + "learning_rate": 2.0956311676416464e-05, + "loss": 0.351, + "step": 405 + }, + { + "epoch": 1.2238805970149254, + "grad_norm": 1.2768980264663696, + "learning_rate": 2.0728840527514294e-05, + "loss": 0.3004, + "step": 410 + }, + { + "epoch": 1.2388059701492538, + "grad_norm": 1.1712465286254883, + "learning_rate": 2.0499816276801724e-05, + "loss": 0.3164, + "step": 415 + }, + { + "epoch": 1.2537313432835822, + "grad_norm": 1.28341543674469, + "learning_rate": 2.0269301013278555e-05, + "loss": 0.2988, + "step": 420 + }, + { + "epoch": 1.2686567164179103, + "grad_norm": 1.2833764553070068, + "learning_rate": 2.0037357230161587e-05, + "loss": 0.3113, + "step": 425 + }, + { + "epoch": 1.2835820895522387, + "grad_norm": 1.1727735996246338, + "learning_rate": 1.9804047807942564e-05, + "loss": 0.3125, + "step": 430 + }, + { + "epoch": 1.2985074626865671, + "grad_norm": 1.0797169208526611, + "learning_rate": 1.956943599734112e-05, + "loss": 0.2802, + "step": 435 + }, + { + "epoch": 1.3134328358208955, + "grad_norm": 1.3341853618621826, + "learning_rate": 1.9333585402157365e-05, + "loss": 0.3221, + "step": 440 + }, + { + "epoch": 1.328358208955224, + "grad_norm": 1.307535171508789, + "learning_rate": 1.9096559962028746e-05, + "loss": 0.2553, + "step": 445 + }, + { + "epoch": 1.3432835820895521, + "grad_norm": 1.11501145362854, + "learning_rate": 1.88584239350959e-05, + "loss": 0.2692, + "step": 450 + }, + { + "epoch": 1.3582089552238805, + "grad_norm": 1.3290529251098633, + "learning_rate": 1.861924188058205e-05, + "loss": 0.3087, + "step": 455 + }, + { + "epoch": 1.373134328358209, + "grad_norm": 1.352819561958313, + "learning_rate": 1.8379078641290923e-05, + "loss": 0.267, + "step": 460 + }, + { + "epoch": 1.3880597014925373, + "grad_norm": 1.2868565320968628, + "learning_rate": 1.8137999326027696e-05, + "loss": 0.3065, + "step": 465 + }, + { + "epoch": 1.4029850746268657, + "grad_norm": 1.4677659273147583, + "learning_rate": 1.7896069291947827e-05, + "loss": 0.2734, + "step": 470 + }, + { + "epoch": 1.417910447761194, + "grad_norm": 1.4118502140045166, + "learning_rate": 1.7653354126838593e-05, + "loss": 0.2456, + "step": 475 + }, + { + "epoch": 1.4328358208955223, + "grad_norm": 1.6239250898361206, + "learning_rate": 1.7409919631338124e-05, + "loss": 0.3099, + "step": 480 + }, + { + "epoch": 1.4477611940298507, + "grad_norm": 1.228977084159851, + "learning_rate": 1.7165831801096635e-05, + "loss": 0.2427, + "step": 485 + }, + { + "epoch": 1.462686567164179, + "grad_norm": 1.7017098665237427, + "learning_rate": 1.6921156808884904e-05, + "loss": 0.2851, + "step": 490 + }, + { + "epoch": 1.4776119402985075, + "grad_norm": 1.2900630235671997, + "learning_rate": 1.6675960986654675e-05, + "loss": 0.2637, + "step": 495 + }, + { + "epoch": 1.4925373134328357, + "grad_norm": 1.2965264320373535, + "learning_rate": 1.6430310807555884e-05, + "loss": 0.2162, + "step": 500 + }, + { + "epoch": 1.5074626865671643, + "grad_norm": 1.44374680519104, + "learning_rate": 1.618427286791568e-05, + "loss": 0.2351, + "step": 505 + }, + { + "epoch": 1.5223880597014925, + "grad_norm": 1.277514100074768, + "learning_rate": 1.593791386918396e-05, + "loss": 0.2347, + "step": 510 + }, + { + "epoch": 1.537313432835821, + "grad_norm": 1.2740216255187988, + "learning_rate": 1.5691300599850495e-05, + "loss": 0.2221, + "step": 515 + }, + { + "epoch": 1.5522388059701493, + "grad_norm": 1.3130953311920166, + "learning_rate": 1.5444499917338398e-05, + "loss": 0.2445, + "step": 520 + }, + { + "epoch": 1.5671641791044775, + "grad_norm": 1.200822114944458, + "learning_rate": 1.5197578729878915e-05, + "loss": 0.2283, + "step": 525 + }, + { + "epoch": 1.582089552238806, + "grad_norm": 1.1942511796951294, + "learning_rate": 1.4950603978372467e-05, + "loss": 0.2334, + "step": 530 + }, + { + "epoch": 1.5970149253731343, + "grad_norm": 1.2476563453674316, + "learning_rate": 1.4703642618240806e-05, + "loss": 0.2233, + "step": 535 + }, + { + "epoch": 1.6119402985074627, + "grad_norm": 1.1321837902069092, + "learning_rate": 1.4456761601275254e-05, + "loss": 0.2401, + "step": 540 + }, + { + "epoch": 1.626865671641791, + "grad_norm": 1.2564575672149658, + "learning_rate": 1.4210027857485932e-05, + "loss": 0.1962, + "step": 545 + }, + { + "epoch": 1.6417910447761193, + "grad_norm": 1.1505788564682007, + "learning_rate": 1.3963508276956832e-05, + "loss": 0.2291, + "step": 550 + }, + { + "epoch": 1.6567164179104479, + "grad_norm": 1.237631916999817, + "learning_rate": 1.371726969171182e-05, + "loss": 0.1994, + "step": 555 + }, + { + "epoch": 1.671641791044776, + "grad_norm": 1.162996530532837, + "learning_rate": 1.34713788575963e-05, + "loss": 0.1901, + "step": 560 + }, + { + "epoch": 1.6865671641791045, + "grad_norm": 1.42928147315979, + "learning_rate": 1.3225902436179515e-05, + "loss": 0.2164, + "step": 565 + }, + { + "epoch": 1.7014925373134329, + "grad_norm": 1.2522096633911133, + "learning_rate": 1.2980906976682508e-05, + "loss": 0.2078, + "step": 570 + }, + { + "epoch": 1.716417910447761, + "grad_norm": 1.4950181245803833, + "learning_rate": 1.2736458897936432e-05, + "loss": 0.1914, + "step": 575 + }, + { + "epoch": 1.7313432835820897, + "grad_norm": 1.3392897844314575, + "learning_rate": 1.2492624470376253e-05, + "loss": 0.2036, + "step": 580 + }, + { + "epoch": 1.7462686567164178, + "grad_norm": 1.2627729177474976, + "learning_rate": 1.22494697980747e-05, + "loss": 0.2199, + "step": 585 + }, + { + "epoch": 1.7611940298507462, + "grad_norm": 1.3076231479644775, + "learning_rate": 1.20070608008213e-05, + "loss": 0.2063, + "step": 590 + }, + { + "epoch": 1.7761194029850746, + "grad_norm": 1.2204923629760742, + "learning_rate": 1.1765463196251349e-05, + "loss": 0.208, + "step": 595 + }, + { + "epoch": 1.7910447761194028, + "grad_norm": 1.260024905204773, + "learning_rate": 1.1524742482029728e-05, + "loss": 0.1758, + "step": 600 + }, + { + "epoch": 1.8059701492537314, + "grad_norm": 1.0565294027328491, + "learning_rate": 1.1284963918094346e-05, + "loss": 0.1946, + "step": 605 + }, + { + "epoch": 1.8208955223880596, + "grad_norm": 1.1785441637039185, + "learning_rate": 1.104619250896399e-05, + "loss": 0.188, + "step": 610 + }, + { + "epoch": 1.835820895522388, + "grad_norm": 1.2795275449752808, + "learning_rate": 1.0808492986115476e-05, + "loss": 0.1654, + "step": 615 + }, + { + "epoch": 1.8507462686567164, + "grad_norm": 1.4200060367584229, + "learning_rate": 1.0571929790434792e-05, + "loss": 0.1796, + "step": 620 + }, + { + "epoch": 1.8656716417910446, + "grad_norm": 1.2729883193969727, + "learning_rate": 1.0336567054747033e-05, + "loss": 0.2029, + "step": 625 + }, + { + "epoch": 1.8805970149253732, + "grad_norm": 1.2387042045593262, + "learning_rate": 1.0102468586429808e-05, + "loss": 0.182, + "step": 630 + }, + { + "epoch": 1.8955223880597014, + "grad_norm": 1.0882388353347778, + "learning_rate": 9.86969785011497e-06, + "loss": 0.172, + "step": 635 + }, + { + "epoch": 1.9104477611940298, + "grad_norm": 1.0938584804534912, + "learning_rate": 9.638317950483167e-06, + "loss": 0.1672, + "step": 640 + }, + { + "epoch": 1.9253731343283582, + "grad_norm": 0.9716625213623047, + "learning_rate": 9.408391615156023e-06, + "loss": 0.1441, + "step": 645 + }, + { + "epoch": 1.9402985074626866, + "grad_norm": 1.4078278541564941, + "learning_rate": 9.179981177690566e-06, + "loss": 0.1532, + "step": 650 + }, + { + "epoch": 1.955223880597015, + "grad_norm": 1.1601283550262451, + "learning_rate": 8.953148560680419e-06, + "loss": 0.1746, + "step": 655 + }, + { + "epoch": 1.9701492537313432, + "grad_norm": 1.3825539350509644, + "learning_rate": 8.727955258968462e-06, + "loss": 0.1686, + "step": 660 + }, + { + "epoch": 1.9850746268656716, + "grad_norm": 1.2583445310592651, + "learning_rate": 8.504462322975442e-06, + "loss": 0.158, + "step": 665 + }, + { + "epoch": 2.0, + "grad_norm": 1.246358036994934, + "learning_rate": 8.282730342149059e-06, + "loss": 0.1475, + "step": 670 + }, + { + "epoch": 2.014925373134328, + "grad_norm": 1.0723435878753662, + "learning_rate": 8.062819428538009e-06, + "loss": 0.121, + "step": 675 + }, + { + "epoch": 2.029850746268657, + "grad_norm": 1.0868420600891113, + "learning_rate": 7.844789200495517e-06, + "loss": 0.1154, + "step": 680 + }, + { + "epoch": 2.044776119402985, + "grad_norm": 1.0777685642242432, + "learning_rate": 7.628698766516625e-06, + "loss": 0.1152, + "step": 685 + }, + { + "epoch": 2.0597014925373136, + "grad_norm": 0.8768631815910339, + "learning_rate": 7.414606709213735e-06, + "loss": 0.1114, + "step": 690 + }, + { + "epoch": 2.074626865671642, + "grad_norm": 0.9294590950012207, + "learning_rate": 7.202571069434772e-06, + "loss": 0.113, + "step": 695 + }, + { + "epoch": 2.08955223880597, + "grad_norm": 1.1497730016708374, + "learning_rate": 6.992649330528146e-06, + "loss": 0.1239, + "step": 700 + }, + { + "epoch": 2.1044776119402986, + "grad_norm": 1.0253499746322632, + "learning_rate": 6.78489840275887e-06, + "loss": 0.1133, + "step": 705 + }, + { + "epoch": 2.1194029850746268, + "grad_norm": 0.9645900130271912, + "learning_rate": 6.579374607880116e-06, + "loss": 0.1115, + "step": 710 + }, + { + "epoch": 2.1343283582089554, + "grad_norm": 1.2921913862228394, + "learning_rate": 6.376133663864196e-06, + "loss": 0.1138, + "step": 715 + }, + { + "epoch": 2.1492537313432836, + "grad_norm": 1.0838154554367065, + "learning_rate": 6.175230669797306e-06, + "loss": 0.1223, + "step": 720 + }, + { + "epoch": 2.1641791044776117, + "grad_norm": 1.3658056259155273, + "learning_rate": 5.976720090942066e-06, + "loss": 0.1235, + "step": 725 + }, + { + "epoch": 2.1791044776119404, + "grad_norm": 1.0306756496429443, + "learning_rate": 5.780655743971844e-06, + "loss": 0.1068, + "step": 730 + }, + { + "epoch": 2.1940298507462686, + "grad_norm": 1.1932945251464844, + "learning_rate": 5.587090782380912e-06, + "loss": 0.1092, + "step": 735 + }, + { + "epoch": 2.208955223880597, + "grad_norm": 0.8893875479698181, + "learning_rate": 5.3960776820744415e-06, + "loss": 0.1219, + "step": 740 + }, + { + "epoch": 2.2238805970149254, + "grad_norm": 0.9615591764450073, + "learning_rate": 5.207668227142178e-06, + "loss": 0.0971, + "step": 745 + }, + { + "epoch": 2.2388059701492535, + "grad_norm": 1.0428558588027954, + "learning_rate": 5.021913495819593e-06, + "loss": 0.1128, + "step": 750 + }, + { + "epoch": 2.253731343283582, + "grad_norm": 0.8330202698707581, + "learning_rate": 4.838863846640524e-06, + "loss": 0.0897, + "step": 755 + }, + { + "epoch": 2.2686567164179103, + "grad_norm": 0.912348210811615, + "learning_rate": 4.6585689047848264e-06, + "loss": 0.1139, + "step": 760 + }, + { + "epoch": 2.283582089552239, + "grad_norm": 1.0514146089553833, + "learning_rate": 4.481077548624871e-06, + "loss": 0.1048, + "step": 765 + }, + { + "epoch": 2.298507462686567, + "grad_norm": 0.9770619869232178, + "learning_rate": 4.306437896474523e-06, + "loss": 0.1165, + "step": 770 + }, + { + "epoch": 2.3134328358208958, + "grad_norm": 0.8356221914291382, + "learning_rate": 4.134697293544158e-06, + "loss": 0.1169, + "step": 775 + }, + { + "epoch": 2.328358208955224, + "grad_norm": 0.9680743217468262, + "learning_rate": 3.965902299105245e-06, + "loss": 0.1058, + "step": 780 + }, + { + "epoch": 2.343283582089552, + "grad_norm": 1.2771291732788086, + "learning_rate": 3.8000986738680245e-06, + "loss": 0.1082, + "step": 785 + }, + { + "epoch": 2.3582089552238807, + "grad_norm": 0.9049290418624878, + "learning_rate": 3.637331367575698e-06, + "loss": 0.104, + "step": 790 + }, + { + "epoch": 2.373134328358209, + "grad_norm": 0.9387826323509216, + "learning_rate": 3.4776445068184365e-06, + "loss": 0.1033, + "step": 795 + }, + { + "epoch": 2.388059701492537, + "grad_norm": 0.7794522643089294, + "learning_rate": 3.32108138307054e-06, + "loss": 0.098, + "step": 800 + }, + { + "epoch": 2.4029850746268657, + "grad_norm": 0.7051690816879272, + "learning_rate": 3.1676844409540607e-06, + "loss": 0.0915, + "step": 805 + }, + { + "epoch": 2.417910447761194, + "grad_norm": 0.7928000688552856, + "learning_rate": 3.017495266731942e-06, + "loss": 0.0944, + "step": 810 + }, + { + "epoch": 2.4328358208955225, + "grad_norm": 0.9799296259880066, + "learning_rate": 2.8705545770338758e-06, + "loss": 0.1045, + "step": 815 + }, + { + "epoch": 2.4477611940298507, + "grad_norm": 0.8028646111488342, + "learning_rate": 2.7269022078179638e-06, + "loss": 0.0999, + "step": 820 + }, + { + "epoch": 2.4626865671641793, + "grad_norm": 0.789699375629425, + "learning_rate": 2.5865771035710777e-06, + "loss": 0.0886, + "step": 825 + }, + { + "epoch": 2.4776119402985075, + "grad_norm": 0.782242476940155, + "learning_rate": 2.449617306750913e-06, + "loss": 0.0994, + "step": 830 + }, + { + "epoch": 2.4925373134328357, + "grad_norm": 0.9552667737007141, + "learning_rate": 2.3160599474726073e-06, + "loss": 0.0852, + "step": 835 + }, + { + "epoch": 2.5074626865671643, + "grad_norm": 0.930198073387146, + "learning_rate": 2.1859412334426853e-06, + "loss": 0.0829, + "step": 840 + }, + { + "epoch": 2.5223880597014925, + "grad_norm": 1.0534543991088867, + "learning_rate": 2.0592964401430377e-06, + "loss": 0.1052, + "step": 845 + }, + { + "epoch": 2.5373134328358207, + "grad_norm": 0.8891270160675049, + "learning_rate": 1.936159901267682e-06, + "loss": 0.0907, + "step": 850 + }, + { + "epoch": 2.5522388059701493, + "grad_norm": 0.8308908343315125, + "learning_rate": 1.8165649994148203e-06, + "loss": 0.0979, + "step": 855 + }, + { + "epoch": 2.5671641791044775, + "grad_norm": 0.969200849533081, + "learning_rate": 1.7005441570367164e-06, + "loss": 0.0982, + "step": 860 + }, + { + "epoch": 2.582089552238806, + "grad_norm": 0.8431044816970825, + "learning_rate": 1.5881288276499211e-06, + "loss": 0.086, + "step": 865 + }, + { + "epoch": 2.5970149253731343, + "grad_norm": 0.6324397325515747, + "learning_rate": 1.4793494873081504e-06, + "loss": 0.0912, + "step": 870 + }, + { + "epoch": 2.611940298507463, + "grad_norm": 0.7684558629989624, + "learning_rate": 1.374235626340128e-06, + "loss": 0.1026, + "step": 875 + }, + { + "epoch": 2.626865671641791, + "grad_norm": 0.7924402356147766, + "learning_rate": 1.2728157413547232e-06, + "loss": 0.0907, + "step": 880 + }, + { + "epoch": 2.6417910447761193, + "grad_norm": 0.7632259726524353, + "learning_rate": 1.1751173275154403e-06, + "loss": 0.0906, + "step": 885 + }, + { + "epoch": 2.656716417910448, + "grad_norm": 0.8474340438842773, + "learning_rate": 1.0811668710864098e-06, + "loss": 0.0873, + "step": 890 + }, + { + "epoch": 2.671641791044776, + "grad_norm": 0.8675773739814758, + "learning_rate": 9.909898422519198e-07, + "loss": 0.1048, + "step": 895 + }, + { + "epoch": 2.6865671641791042, + "grad_norm": 0.6585366725921631, + "learning_rate": 9.046106882113753e-07, + "loss": 0.0747, + "step": 900 + }, + { + "epoch": 2.701492537313433, + "grad_norm": 0.940150260925293, + "learning_rate": 8.220528265516125e-07, + "loss": 0.0869, + "step": 905 + }, + { + "epoch": 2.716417910447761, + "grad_norm": 0.7632895112037659, + "learning_rate": 7.433386388983343e-07, + "loss": 0.0784, + "step": 910 + }, + { + "epoch": 2.7313432835820897, + "grad_norm": 0.6961763501167297, + "learning_rate": 6.684894648484069e-07, + "loss": 0.0832, + "step": 915 + }, + { + "epoch": 2.746268656716418, + "grad_norm": 0.7490938305854797, + "learning_rate": 5.975255961846343e-07, + "loss": 0.1003, + "step": 920 + }, + { + "epoch": 2.7611940298507465, + "grad_norm": 0.7382441759109497, + "learning_rate": 5.304662713746205e-07, + "loss": 0.0926, + "step": 925 + }, + { + "epoch": 2.7761194029850746, + "grad_norm": 0.7356705069541931, + "learning_rate": 4.6732967035517326e-07, + "loss": 0.0914, + "step": 930 + }, + { + "epoch": 2.791044776119403, + "grad_norm": 0.7635155916213989, + "learning_rate": 4.081329096036829e-07, + "loss": 0.0933, + "step": 935 + }, + { + "epoch": 2.8059701492537314, + "grad_norm": 0.8901118636131287, + "learning_rate": 3.528920374977979e-07, + "loss": 0.0881, + "step": 940 + }, + { + "epoch": 2.8208955223880596, + "grad_norm": 0.7250303030014038, + "learning_rate": 3.0162202996468156e-07, + "loss": 0.0816, + "step": 945 + }, + { + "epoch": 2.835820895522388, + "grad_norm": 0.859161913394928, + "learning_rate": 2.5433678642100664e-07, + "loss": 0.098, + "step": 950 + }, + { + "epoch": 2.8507462686567164, + "grad_norm": 0.7967370748519897, + "learning_rate": 2.110491260047792e-07, + "loss": 0.096, + "step": 955 + }, + { + "epoch": 2.8656716417910446, + "grad_norm": 0.8971421718597412, + "learning_rate": 1.7177078410005041e-07, + "loss": 0.0964, + "step": 960 + }, + { + "epoch": 2.8805970149253732, + "grad_norm": 0.9559134244918823, + "learning_rate": 1.3651240915542652e-07, + "loss": 0.1026, + "step": 965 + }, + { + "epoch": 2.8955223880597014, + "grad_norm": 0.7488626837730408, + "learning_rate": 1.0528355979724624e-07, + "loss": 0.0897, + "step": 970 + }, + { + "epoch": 2.91044776119403, + "grad_norm": 0.701232373714447, + "learning_rate": 7.809270223821552e-08, + "loss": 0.0772, + "step": 975 + }, + { + "epoch": 2.925373134328358, + "grad_norm": 0.7586269378662109, + "learning_rate": 5.4947207982204985e-08, + "loss": 0.0842, + "step": 980 + }, + { + "epoch": 2.9402985074626864, + "grad_norm": 0.770429790019989, + "learning_rate": 3.585335182580529e-08, + "loss": 0.1077, + "step": 985 + }, + { + "epoch": 2.955223880597015, + "grad_norm": 0.648844301700592, + "learning_rate": 2.0816310157227846e-08, + "loss": 0.0898, + "step": 990 + }, + { + "epoch": 2.970149253731343, + "grad_norm": 0.7136843800544739, + "learning_rate": 9.840159552969019e-09, + "loss": 0.083, + "step": 995 + }, + { + "epoch": 2.9850746268656714, + "grad_norm": 0.7951557636260986, + "learning_rate": 2.9278756726375257e-09, + "loss": 0.0959, + "step": 1000 + }, + { + "epoch": 3.0, + "grad_norm": 0.842806339263916, + "learning_rate": 8.133245225305785e-11, + "loss": 0.099, + "step": 1005 + }, + { + "epoch": 3.0, + "step": 1005, + "total_flos": 1.2667695529522627e+18, + "train_loss": 0.42366581120301244, + "train_runtime": 1008.4374, + "train_samples_per_second": 31.891, + "train_steps_per_second": 0.997 + } + ], + "logging_steps": 5, + "max_steps": 1005, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2667695529522627e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdd02782d1810ce97a4247a1ebd42bf16fdb5568 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/4_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5041a874496312068e64aad43b14e2b622f8b3e7ca5bfd3f7119baf145f45f3 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3eff8f9166c7aca53bfc4f8e60702571cfd36888 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 5_128_e3_3e-5 + results: [] +--- + + + +# 5_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a4b1cb8e9eee6cc98965b8c8d7a2b31c5079068d --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "o_proj", + "up_proj", + "v_proj", + "gate_proj", + "q_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d6870fa47b394325588c536f514ad514ac7b1f6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5e5d09278320cb809ce3525287ddbb58c12359cc9115a0390e4db4ad7699af +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..994eaa347179c6afda61ef673a1198013b304621 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.382977535502975e+18, + "train_loss": 0.42970009316403573, + "train_runtime": 1112.9911, + "train_samples": 11683, + "train_samples_per_second": 31.491, + "train_steps_per_second": 0.987 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..994eaa347179c6afda61ef673a1198013b304621 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.382977535502975e+18, + "train_loss": 0.42970009316403573, + "train_runtime": 1112.9911, + "train_samples": 11683, + "train_samples_per_second": 31.491, + "train_steps_per_second": 0.987 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7072690ef033926f345943cd076efe451ef99657 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1576 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1098, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013689253935660506, + "grad_norm": 0.5928049087524414, + "learning_rate": 2.181818181818182e-06, + "loss": 1.6495, + "step": 5 + }, + { + "epoch": 0.02737850787132101, + "grad_norm": 0.6891129016876221, + "learning_rate": 4.90909090909091e-06, + "loss": 1.5274, + "step": 10 + }, + { + "epoch": 0.04106776180698152, + "grad_norm": 0.5830374360084534, + "learning_rate": 7.636363636363636e-06, + "loss": 1.503, + "step": 15 + }, + { + "epoch": 0.05475701574264202, + "grad_norm": 0.5786980986595154, + "learning_rate": 1.0363636363636364e-05, + "loss": 1.5385, + "step": 20 + }, + { + "epoch": 0.06844626967830253, + "grad_norm": 0.49221134185791016, + "learning_rate": 1.309090909090909e-05, + "loss": 1.4763, + "step": 25 + }, + { + "epoch": 0.08213552361396304, + "grad_norm": 0.5124127864837646, + "learning_rate": 1.5818181818181818e-05, + "loss": 1.5516, + "step": 30 + }, + { + "epoch": 0.09582477754962354, + "grad_norm": 0.5171751976013184, + "learning_rate": 1.8545454545454545e-05, + "loss": 1.5729, + "step": 35 + }, + { + "epoch": 0.10951403148528405, + "grad_norm": 0.4481073021888733, + "learning_rate": 2.1272727272727273e-05, + "loss": 1.4787, + "step": 40 + }, + { + "epoch": 0.12320328542094455, + "grad_norm": 0.4905588924884796, + "learning_rate": 2.4e-05, + "loss": 1.4494, + "step": 45 + }, + { + "epoch": 0.13689253935660506, + "grad_norm": 0.5428522825241089, + "learning_rate": 2.6727272727272728e-05, + "loss": 1.4442, + "step": 50 + }, + { + "epoch": 0.15058179329226556, + "grad_norm": 0.6061527132987976, + "learning_rate": 2.9454545454545456e-05, + "loss": 1.4938, + "step": 55 + }, + { + "epoch": 0.16427104722792607, + "grad_norm": 0.5363776087760925, + "learning_rate": 2.9998911302762293e-05, + "loss": 1.3451, + "step": 60 + }, + { + "epoch": 0.17796030116358658, + "grad_norm": 0.5691450238227844, + "learning_rate": 2.9994488741083514e-05, + "loss": 1.4411, + "step": 65 + }, + { + "epoch": 0.19164955509924708, + "grad_norm": 0.5887356400489807, + "learning_rate": 2.9986665273697548e-05, + "loss": 1.3288, + "step": 70 + }, + { + "epoch": 0.2053388090349076, + "grad_norm": 0.5945854187011719, + "learning_rate": 2.997544267504801e-05, + "loss": 1.335, + "step": 75 + }, + { + "epoch": 0.2190280629705681, + "grad_norm": 0.6412392258644104, + "learning_rate": 2.9960823490536772e-05, + "loss": 1.3271, + "step": 80 + }, + { + "epoch": 0.2327173169062286, + "grad_norm": 0.6606377363204956, + "learning_rate": 2.9942811035946656e-05, + "loss": 1.267, + "step": 85 + }, + { + "epoch": 0.2464065708418891, + "grad_norm": 0.6199719905853271, + "learning_rate": 2.9921409396689347e-05, + "loss": 1.2498, + "step": 90 + }, + { + "epoch": 0.2600958247775496, + "grad_norm": 0.6670178174972534, + "learning_rate": 2.9896623426878805e-05, + "loss": 1.1987, + "step": 95 + }, + { + "epoch": 0.2737850787132101, + "grad_norm": 0.7033961415290833, + "learning_rate": 2.9868458748230293e-05, + "loss": 1.1502, + "step": 100 + }, + { + "epoch": 0.2874743326488706, + "grad_norm": 0.7398504018783569, + "learning_rate": 2.983692174878531e-05, + "loss": 1.0955, + "step": 105 + }, + { + "epoch": 0.30116358658453113, + "grad_norm": 0.8326859474182129, + "learning_rate": 2.980201958146272e-05, + "loss": 1.1422, + "step": 110 + }, + { + "epoch": 0.31485284052019163, + "grad_norm": 0.811793327331543, + "learning_rate": 2.9763760162436405e-05, + "loss": 1.1345, + "step": 115 + }, + { + "epoch": 0.32854209445585214, + "grad_norm": 0.8068777918815613, + "learning_rate": 2.9722152169339765e-05, + "loss": 1.1152, + "step": 120 + }, + { + "epoch": 0.34223134839151265, + "grad_norm": 0.9504799842834473, + "learning_rate": 2.967720503929759e-05, + "loss": 1.0244, + "step": 125 + }, + { + "epoch": 0.35592060232717315, + "grad_norm": 0.816230297088623, + "learning_rate": 2.962892896678557e-05, + "loss": 1.0203, + "step": 130 + }, + { + "epoch": 0.36960985626283366, + "grad_norm": 0.992814302444458, + "learning_rate": 2.9577334901318115e-05, + "loss": 0.9981, + "step": 135 + }, + { + "epoch": 0.38329911019849416, + "grad_norm": 0.8164167404174805, + "learning_rate": 2.952243454496488e-05, + "loss": 0.9909, + "step": 140 + }, + { + "epoch": 0.39698836413415467, + "grad_norm": 0.9686955213546753, + "learning_rate": 2.9464240349696625e-05, + "loss": 0.9359, + "step": 145 + }, + { + "epoch": 0.4106776180698152, + "grad_norm": 0.8472756743431091, + "learning_rate": 2.9402765514560955e-05, + "loss": 1.0371, + "step": 150 + }, + { + "epoch": 0.4243668720054757, + "grad_norm": 0.9787071347236633, + "learning_rate": 2.9338023982688657e-05, + "loss": 0.8961, + "step": 155 + }, + { + "epoch": 0.4380561259411362, + "grad_norm": 1.0443791151046753, + "learning_rate": 2.9270030438131263e-05, + "loss": 0.9193, + "step": 160 + }, + { + "epoch": 0.4517453798767967, + "grad_norm": 1.0254266262054443, + "learning_rate": 2.9198800302530532e-05, + "loss": 0.9052, + "step": 165 + }, + { + "epoch": 0.4654346338124572, + "grad_norm": 1.05888831615448, + "learning_rate": 2.912434973162067e-05, + "loss": 0.8032, + "step": 170 + }, + { + "epoch": 0.4791238877481177, + "grad_norm": 1.002254605293274, + "learning_rate": 2.904669561156404e-05, + "loss": 0.8621, + "step": 175 + }, + { + "epoch": 0.4928131416837782, + "grad_norm": 0.9362477660179138, + "learning_rate": 2.8965855555121216e-05, + "loss": 0.8928, + "step": 180 + }, + { + "epoch": 0.5065023956194388, + "grad_norm": 1.2419685125350952, + "learning_rate": 2.8881847897656224e-05, + "loss": 0.7808, + "step": 185 + }, + { + "epoch": 0.5201916495550992, + "grad_norm": 1.0579568147659302, + "learning_rate": 2.879469169297787e-05, + "loss": 0.7638, + "step": 190 + }, + { + "epoch": 0.5338809034907598, + "grad_norm": 1.2357258796691895, + "learning_rate": 2.870440670901816e-05, + "loss": 0.825, + "step": 195 + }, + { + "epoch": 0.5475701574264202, + "grad_norm": 1.0414958000183105, + "learning_rate": 2.8611013423348727e-05, + "loss": 0.7161, + "step": 200 + }, + { + "epoch": 0.5612594113620808, + "grad_norm": 1.0498273372650146, + "learning_rate": 2.8514533018536286e-05, + "loss": 0.8646, + "step": 205 + }, + { + "epoch": 0.5749486652977412, + "grad_norm": 1.3027129173278809, + "learning_rate": 2.841498737733824e-05, + "loss": 0.6918, + "step": 210 + }, + { + "epoch": 0.5886379192334018, + "grad_norm": 1.1668156385421753, + "learning_rate": 2.8312399077739407e-05, + "loss": 0.7298, + "step": 215 + }, + { + "epoch": 0.6023271731690623, + "grad_norm": 1.015423059463501, + "learning_rate": 2.8206791387831136e-05, + "loss": 0.788, + "step": 220 + }, + { + "epoch": 0.6160164271047228, + "grad_norm": 1.269215703010559, + "learning_rate": 2.8098188260533818e-05, + "loss": 0.7228, + "step": 225 + }, + { + "epoch": 0.6297056810403833, + "grad_norm": 1.304238200187683, + "learning_rate": 2.7986614328164168e-05, + "loss": 0.6988, + "step": 230 + }, + { + "epoch": 0.6433949349760438, + "grad_norm": 1.1419962644577026, + "learning_rate": 2.7872094896848307e-05, + "loss": 0.7123, + "step": 235 + }, + { + "epoch": 0.6570841889117043, + "grad_norm": 1.0552401542663574, + "learning_rate": 2.7754655940782117e-05, + "loss": 0.7137, + "step": 240 + }, + { + "epoch": 0.6707734428473648, + "grad_norm": 1.171123743057251, + "learning_rate": 2.763432409633998e-05, + "loss": 0.6726, + "step": 245 + }, + { + "epoch": 0.6844626967830253, + "grad_norm": 1.1724942922592163, + "learning_rate": 2.751112665603341e-05, + "loss": 0.6745, + "step": 250 + }, + { + "epoch": 0.6981519507186859, + "grad_norm": 1.1300872564315796, + "learning_rate": 2.7385091562320808e-05, + "loss": 0.652, + "step": 255 + }, + { + "epoch": 0.7118412046543463, + "grad_norm": 1.1612063646316528, + "learning_rate": 2.7256247401269814e-05, + "loss": 0.6523, + "step": 260 + }, + { + "epoch": 0.7255304585900069, + "grad_norm": 1.1699851751327515, + "learning_rate": 2.7124623396073715e-05, + "loss": 0.6397, + "step": 265 + }, + { + "epoch": 0.7392197125256673, + "grad_norm": 1.2017160654067993, + "learning_rate": 2.6990249400423305e-05, + "loss": 0.5969, + "step": 270 + }, + { + "epoch": 0.7529089664613279, + "grad_norm": 1.253274917602539, + "learning_rate": 2.685315589173576e-05, + "loss": 0.6876, + "step": 275 + }, + { + "epoch": 0.7665982203969883, + "grad_norm": 1.1968927383422852, + "learning_rate": 2.6713373964242043e-05, + "loss": 0.6246, + "step": 280 + }, + { + "epoch": 0.7802874743326489, + "grad_norm": 1.4252917766571045, + "learning_rate": 2.6570935321934417e-05, + "loss": 0.5826, + "step": 285 + }, + { + "epoch": 0.7939767282683093, + "grad_norm": 1.224582552909851, + "learning_rate": 2.642587227137564e-05, + "loss": 0.5852, + "step": 290 + }, + { + "epoch": 0.8076659822039699, + "grad_norm": 1.3189269304275513, + "learning_rate": 2.6278217714371496e-05, + "loss": 0.5856, + "step": 295 + }, + { + "epoch": 0.8213552361396304, + "grad_norm": 1.253151535987854, + "learning_rate": 2.612800514050836e-05, + "loss": 0.5994, + "step": 300 + }, + { + "epoch": 0.8350444900752909, + "grad_norm": 1.2955926656723022, + "learning_rate": 2.597526861955736e-05, + "loss": 0.5605, + "step": 305 + }, + { + "epoch": 0.8487337440109514, + "grad_norm": 1.412084698677063, + "learning_rate": 2.582004279374704e-05, + "loss": 0.5549, + "step": 310 + }, + { + "epoch": 0.8624229979466119, + "grad_norm": 1.187665343284607, + "learning_rate": 2.5662362869906123e-05, + "loss": 0.5314, + "step": 315 + }, + { + "epoch": 0.8761122518822724, + "grad_norm": 1.3789371252059937, + "learning_rate": 2.5502264611478238e-05, + "loss": 0.5382, + "step": 320 + }, + { + "epoch": 0.8898015058179329, + "grad_norm": 1.3071212768554688, + "learning_rate": 2.5339784330410413e-05, + "loss": 0.5399, + "step": 325 + }, + { + "epoch": 0.9034907597535934, + "grad_norm": 1.2759591341018677, + "learning_rate": 2.5174958878917135e-05, + "loss": 0.4737, + "step": 330 + }, + { + "epoch": 0.917180013689254, + "grad_norm": 1.1933642625808716, + "learning_rate": 2.500782564112188e-05, + "loss": 0.5197, + "step": 335 + }, + { + "epoch": 0.9308692676249144, + "grad_norm": 1.2646616697311401, + "learning_rate": 2.4838422524578027e-05, + "loss": 0.5153, + "step": 340 + }, + { + "epoch": 0.944558521560575, + "grad_norm": 1.1475528478622437, + "learning_rate": 2.4666787951671013e-05, + "loss": 0.4641, + "step": 345 + }, + { + "epoch": 0.9582477754962354, + "grad_norm": 1.1558189392089844, + "learning_rate": 2.4492960850903757e-05, + "loss": 0.4948, + "step": 350 + }, + { + "epoch": 0.971937029431896, + "grad_norm": 1.2516051530838013, + "learning_rate": 2.4316980648067225e-05, + "loss": 0.5147, + "step": 355 + }, + { + "epoch": 0.9856262833675564, + "grad_norm": 1.3212283849716187, + "learning_rate": 2.4138887257298317e-05, + "loss": 0.4966, + "step": 360 + }, + { + "epoch": 0.999315537303217, + "grad_norm": 1.342965841293335, + "learning_rate": 2.3958721072026893e-05, + "loss": 0.4402, + "step": 365 + }, + { + "epoch": 1.0109514031485285, + "grad_norm": 1.456713318824768, + "learning_rate": 2.3776522955814094e-05, + "loss": 0.3705, + "step": 370 + }, + { + "epoch": 1.0246406570841888, + "grad_norm": 1.3196852207183838, + "learning_rate": 2.3592334233084115e-05, + "loss": 0.4169, + "step": 375 + }, + { + "epoch": 1.0383299110198494, + "grad_norm": 1.243699550628662, + "learning_rate": 2.3406196679751376e-05, + "loss": 0.4262, + "step": 380 + }, + { + "epoch": 1.05201916495551, + "grad_norm": 1.1757159233093262, + "learning_rate": 2.3218152513745306e-05, + "loss": 0.4117, + "step": 385 + }, + { + "epoch": 1.0657084188911705, + "grad_norm": 1.320862054824829, + "learning_rate": 2.3028244385434863e-05, + "loss": 0.3882, + "step": 390 + }, + { + "epoch": 1.0793976728268309, + "grad_norm": 1.4081153869628906, + "learning_rate": 2.283651536795504e-05, + "loss": 0.3621, + "step": 395 + }, + { + "epoch": 1.0930869267624914, + "grad_norm": 1.2154595851898193, + "learning_rate": 2.2643008947437368e-05, + "loss": 0.3786, + "step": 400 + }, + { + "epoch": 1.106776180698152, + "grad_norm": 1.217887282371521, + "learning_rate": 2.244776901314685e-05, + "loss": 0.3701, + "step": 405 + }, + { + "epoch": 1.1204654346338125, + "grad_norm": 1.4208720922470093, + "learning_rate": 2.22508398475274e-05, + "loss": 0.3657, + "step": 410 + }, + { + "epoch": 1.1341546885694729, + "grad_norm": 1.3144091367721558, + "learning_rate": 2.2052266116158157e-05, + "loss": 0.3816, + "step": 415 + }, + { + "epoch": 1.1478439425051334, + "grad_norm": 1.4254578351974487, + "learning_rate": 2.185209285762281e-05, + "loss": 0.3332, + "step": 420 + }, + { + "epoch": 1.161533196440794, + "grad_norm": 1.2243602275848389, + "learning_rate": 2.165036547329444e-05, + "loss": 0.3804, + "step": 425 + }, + { + "epoch": 1.1752224503764546, + "grad_norm": 1.1913032531738281, + "learning_rate": 2.144712971703799e-05, + "loss": 0.3216, + "step": 430 + }, + { + "epoch": 1.188911704312115, + "grad_norm": 1.2852219343185425, + "learning_rate": 2.1242431684832802e-05, + "loss": 0.3544, + "step": 435 + }, + { + "epoch": 1.2026009582477755, + "grad_norm": 1.3050099611282349, + "learning_rate": 2.103631780431759e-05, + "loss": 0.3416, + "step": 440 + }, + { + "epoch": 1.216290212183436, + "grad_norm": 1.2847477197647095, + "learning_rate": 2.0828834824260168e-05, + "loss": 0.366, + "step": 445 + }, + { + "epoch": 1.2299794661190966, + "grad_norm": 1.2032654285430908, + "learning_rate": 2.062002980395433e-05, + "loss": 0.3467, + "step": 450 + }, + { + "epoch": 1.243668720054757, + "grad_norm": 1.310543417930603, + "learning_rate": 2.0409950102546334e-05, + "loss": 0.3214, + "step": 455 + }, + { + "epoch": 1.2573579739904175, + "grad_norm": 1.2932744026184082, + "learning_rate": 2.0198643368293328e-05, + "loss": 0.3396, + "step": 460 + }, + { + "epoch": 1.271047227926078, + "grad_norm": 1.3147145509719849, + "learning_rate": 1.998615752775626e-05, + "loss": 0.3073, + "step": 465 + }, + { + "epoch": 1.2847364818617386, + "grad_norm": 1.3344918489456177, + "learning_rate": 1.9772540774929624e-05, + "loss": 0.3085, + "step": 470 + }, + { + "epoch": 1.298425735797399, + "grad_norm": 1.358870267868042, + "learning_rate": 1.9557841560310556e-05, + "loss": 0.3052, + "step": 475 + }, + { + "epoch": 1.3121149897330595, + "grad_norm": 1.431424617767334, + "learning_rate": 1.934210857990977e-05, + "loss": 0.3292, + "step": 480 + }, + { + "epoch": 1.32580424366872, + "grad_norm": 1.1961171627044678, + "learning_rate": 1.912539076420678e-05, + "loss": 0.3013, + "step": 485 + }, + { + "epoch": 1.3394934976043806, + "grad_norm": 1.276461124420166, + "learning_rate": 1.890773726705198e-05, + "loss": 0.319, + "step": 490 + }, + { + "epoch": 1.3531827515400412, + "grad_norm": 1.2716048955917358, + "learning_rate": 1.8689197454518034e-05, + "loss": 0.2806, + "step": 495 + }, + { + "epoch": 1.3668720054757015, + "grad_norm": 1.091530680656433, + "learning_rate": 1.846982089370312e-05, + "loss": 0.2784, + "step": 500 + }, + { + "epoch": 1.380561259411362, + "grad_norm": 1.2333450317382812, + "learning_rate": 1.824965734148863e-05, + "loss": 0.2831, + "step": 505 + }, + { + "epoch": 1.3942505133470227, + "grad_norm": 1.3155099153518677, + "learning_rate": 1.8028756733253758e-05, + "loss": 0.307, + "step": 510 + }, + { + "epoch": 1.407939767282683, + "grad_norm": 1.2477658987045288, + "learning_rate": 1.7807169171549677e-05, + "loss": 0.2502, + "step": 515 + }, + { + "epoch": 1.4216290212183436, + "grad_norm": 1.351386308670044, + "learning_rate": 1.7584944914735713e-05, + "loss": 0.3021, + "step": 520 + }, + { + "epoch": 1.4353182751540041, + "grad_norm": 1.2146998643875122, + "learning_rate": 1.7362134365580268e-05, + "loss": 0.2819, + "step": 525 + }, + { + "epoch": 1.4490075290896647, + "grad_norm": 1.2512556314468384, + "learning_rate": 1.7138788059828935e-05, + "loss": 0.2773, + "step": 530 + }, + { + "epoch": 1.4626967830253252, + "grad_norm": 1.3788397312164307, + "learning_rate": 1.6914956654742454e-05, + "loss": 0.2332, + "step": 535 + }, + { + "epoch": 1.4763860369609856, + "grad_norm": 1.2822272777557373, + "learning_rate": 1.6690690917607138e-05, + "loss": 0.2394, + "step": 540 + }, + { + "epoch": 1.4900752908966461, + "grad_norm": 1.4335960149765015, + "learning_rate": 1.6466041714220316e-05, + "loss": 0.2379, + "step": 545 + }, + { + "epoch": 1.5037645448323067, + "grad_norm": 1.4007731676101685, + "learning_rate": 1.6241059997353442e-05, + "loss": 0.2791, + "step": 550 + }, + { + "epoch": 1.517453798767967, + "grad_norm": 1.2620443105697632, + "learning_rate": 1.6015796795195485e-05, + "loss": 0.2639, + "step": 555 + }, + { + "epoch": 1.5311430527036276, + "grad_norm": 1.2049144506454468, + "learning_rate": 1.5790303199779194e-05, + "loss": 0.2472, + "step": 560 + }, + { + "epoch": 1.5448323066392882, + "grad_norm": 1.314141035079956, + "learning_rate": 1.5564630355392902e-05, + "loss": 0.2164, + "step": 565 + }, + { + "epoch": 1.5585215605749485, + "grad_norm": 1.4751919507980347, + "learning_rate": 1.5338829446980464e-05, + "loss": 0.2558, + "step": 570 + }, + { + "epoch": 1.5722108145106093, + "grad_norm": 1.534978985786438, + "learning_rate": 1.5112951688532002e-05, + "loss": 0.2454, + "step": 575 + }, + { + "epoch": 1.5859000684462696, + "grad_norm": 1.379879355430603, + "learning_rate": 1.4887048311468002e-05, + "loss": 0.2311, + "step": 580 + }, + { + "epoch": 1.5995893223819302, + "grad_norm": 1.3975728750228882, + "learning_rate": 1.4661170553019537e-05, + "loss": 0.2376, + "step": 585 + }, + { + "epoch": 1.6132785763175908, + "grad_norm": 1.3899104595184326, + "learning_rate": 1.4435369644607104e-05, + "loss": 0.2281, + "step": 590 + }, + { + "epoch": 1.626967830253251, + "grad_norm": 1.1865293979644775, + "learning_rate": 1.4209696800220807e-05, + "loss": 0.2672, + "step": 595 + }, + { + "epoch": 1.6406570841889117, + "grad_norm": 1.2229032516479492, + "learning_rate": 1.3984203204804517e-05, + "loss": 0.2364, + "step": 600 + }, + { + "epoch": 1.6543463381245722, + "grad_norm": 1.1632156372070312, + "learning_rate": 1.3758940002646562e-05, + "loss": 0.1885, + "step": 605 + }, + { + "epoch": 1.6680355920602326, + "grad_norm": 1.2751914262771606, + "learning_rate": 1.3533958285779687e-05, + "loss": 0.2205, + "step": 610 + }, + { + "epoch": 1.6817248459958933, + "grad_norm": 1.3402711153030396, + "learning_rate": 1.3309309082392864e-05, + "loss": 0.2109, + "step": 615 + }, + { + "epoch": 1.6954140999315537, + "grad_norm": 1.3395179510116577, + "learning_rate": 1.3085043345257553e-05, + "loss": 0.2318, + "step": 620 + }, + { + "epoch": 1.7091033538672142, + "grad_norm": 1.2485102415084839, + "learning_rate": 1.2861211940171067e-05, + "loss": 0.207, + "step": 625 + }, + { + "epoch": 1.7227926078028748, + "grad_norm": 1.1756024360656738, + "learning_rate": 1.2637865634419735e-05, + "loss": 0.2191, + "step": 630 + }, + { + "epoch": 1.7364818617385351, + "grad_norm": 1.4641789197921753, + "learning_rate": 1.2415055085264289e-05, + "loss": 0.2371, + "step": 635 + }, + { + "epoch": 1.750171115674196, + "grad_norm": 1.2560603618621826, + "learning_rate": 1.2192830828450327e-05, + "loss": 0.237, + "step": 640 + }, + { + "epoch": 1.7638603696098563, + "grad_norm": 1.265170693397522, + "learning_rate": 1.1971243266746243e-05, + "loss": 0.2025, + "step": 645 + }, + { + "epoch": 1.7775496235455168, + "grad_norm": 1.3927359580993652, + "learning_rate": 1.175034265851137e-05, + "loss": 0.2119, + "step": 650 + }, + { + "epoch": 1.7912388774811774, + "grad_norm": 1.1295206546783447, + "learning_rate": 1.1530179106296881e-05, + "loss": 0.199, + "step": 655 + }, + { + "epoch": 1.8049281314168377, + "grad_norm": 1.3824551105499268, + "learning_rate": 1.131080254548197e-05, + "loss": 0.1787, + "step": 660 + }, + { + "epoch": 1.8186173853524983, + "grad_norm": 1.203173279762268, + "learning_rate": 1.1092262732948017e-05, + "loss": 0.1882, + "step": 665 + }, + { + "epoch": 1.8323066392881588, + "grad_norm": 1.2954728603363037, + "learning_rate": 1.0874609235793222e-05, + "loss": 0.1829, + "step": 670 + }, + { + "epoch": 1.8459958932238192, + "grad_norm": 1.3682695627212524, + "learning_rate": 1.0657891420090236e-05, + "loss": 0.1673, + "step": 675 + }, + { + "epoch": 1.85968514715948, + "grad_norm": 1.071696162223816, + "learning_rate": 1.0442158439689444e-05, + "loss": 0.1978, + "step": 680 + }, + { + "epoch": 1.8733744010951403, + "grad_norm": 1.079213261604309, + "learning_rate": 1.0227459225070379e-05, + "loss": 0.177, + "step": 685 + }, + { + "epoch": 1.8870636550308009, + "grad_norm": 1.1991767883300781, + "learning_rate": 1.0013842472243742e-05, + "loss": 0.1686, + "step": 690 + }, + { + "epoch": 1.9007529089664614, + "grad_norm": 1.0825111865997314, + "learning_rate": 9.801356631706676e-06, + "loss": 0.1757, + "step": 695 + }, + { + "epoch": 1.9144421629021218, + "grad_norm": 1.2867668867111206, + "learning_rate": 9.590049897453668e-06, + "loss": 0.163, + "step": 700 + }, + { + "epoch": 1.9281314168377823, + "grad_norm": 1.1242690086364746, + "learning_rate": 9.379970196045672e-06, + "loss": 0.1718, + "step": 705 + }, + { + "epoch": 1.941820670773443, + "grad_norm": 1.0667998790740967, + "learning_rate": 9.171165175739832e-06, + "loss": 0.1495, + "step": 710 + }, + { + "epoch": 1.9555099247091032, + "grad_norm": 1.1642811298370361, + "learning_rate": 8.96368219568241e-06, + "loss": 0.1543, + "step": 715 + }, + { + "epoch": 1.969199178644764, + "grad_norm": 1.2360124588012695, + "learning_rate": 8.7575683151672e-06, + "loss": 0.1791, + "step": 720 + }, + { + "epoch": 1.9828884325804244, + "grad_norm": 1.118776798248291, + "learning_rate": 8.552870282962012e-06, + "loss": 0.1796, + "step": 725 + }, + { + "epoch": 1.996577686516085, + "grad_norm": 1.040224313735962, + "learning_rate": 8.349634526705558e-06, + "loss": 0.1721, + "step": 730 + }, + { + "epoch": 2.008213552361396, + "grad_norm": 1.0651005506515503, + "learning_rate": 8.147907142377198e-06, + "loss": 0.1294, + "step": 735 + }, + { + "epoch": 2.021902806297057, + "grad_norm": 1.5293514728546143, + "learning_rate": 7.947733883841847e-06, + "loss": 0.1345, + "step": 740 + }, + { + "epoch": 2.0355920602327173, + "grad_norm": 1.0902165174484253, + "learning_rate": 7.749160152472603e-06, + "loss": 0.1373, + "step": 745 + }, + { + "epoch": 2.0492813141683777, + "grad_norm": 1.252886176109314, + "learning_rate": 7.552230986853153e-06, + "loss": 0.1338, + "step": 750 + }, + { + "epoch": 2.0629705681040384, + "grad_norm": 1.094232201576233, + "learning_rate": 7.35699105256263e-06, + "loss": 0.1184, + "step": 755 + }, + { + "epoch": 2.076659822039699, + "grad_norm": 0.9510800838470459, + "learning_rate": 7.1634846320449625e-06, + "loss": 0.1312, + "step": 760 + }, + { + "epoch": 2.090349075975359, + "grad_norm": 0.9192227125167847, + "learning_rate": 6.971755614565131e-06, + "loss": 0.1098, + "step": 765 + }, + { + "epoch": 2.10403832991102, + "grad_norm": 0.9396990537643433, + "learning_rate": 6.781847486254698e-06, + "loss": 0.1108, + "step": 770 + }, + { + "epoch": 2.1177275838466803, + "grad_norm": 1.1747150421142578, + "learning_rate": 6.593803320248625e-06, + "loss": 0.1109, + "step": 775 + }, + { + "epoch": 2.131416837782341, + "grad_norm": 1.0359351634979248, + "learning_rate": 6.407665766915886e-06, + "loss": 0.1088, + "step": 780 + }, + { + "epoch": 2.1451060917180014, + "grad_norm": 1.1016526222229004, + "learning_rate": 6.223477044185909e-06, + "loss": 0.1165, + "step": 785 + }, + { + "epoch": 2.1587953456536617, + "grad_norm": 1.07218599319458, + "learning_rate": 6.04127892797311e-06, + "loss": 0.1337, + "step": 790 + }, + { + "epoch": 2.1724845995893225, + "grad_norm": 0.8604865670204163, + "learning_rate": 5.861112742701678e-06, + "loss": 0.1037, + "step": 795 + }, + { + "epoch": 2.186173853524983, + "grad_norm": 0.9908950328826904, + "learning_rate": 5.683019351932775e-06, + "loss": 0.1213, + "step": 800 + }, + { + "epoch": 2.1998631074606436, + "grad_norm": 1.0044302940368652, + "learning_rate": 5.507039149096251e-06, + "loss": 0.0984, + "step": 805 + }, + { + "epoch": 2.213552361396304, + "grad_norm": 1.0039098262786865, + "learning_rate": 5.333212048328983e-06, + "loss": 0.1166, + "step": 810 + }, + { + "epoch": 2.2272416153319643, + "grad_norm": 1.092068076133728, + "learning_rate": 5.161577475421978e-06, + "loss": 0.1148, + "step": 815 + }, + { + "epoch": 2.240930869267625, + "grad_norm": 0.9727234840393066, + "learning_rate": 4.992174358878126e-06, + "loss": 0.1004, + "step": 820 + }, + { + "epoch": 2.2546201232032854, + "grad_norm": 1.0300990343093872, + "learning_rate": 4.82504112108287e-06, + "loss": 0.1198, + "step": 825 + }, + { + "epoch": 2.2683093771389458, + "grad_norm": 0.8549792170524597, + "learning_rate": 4.660215669589589e-06, + "loss": 0.1131, + "step": 830 + }, + { + "epoch": 2.2819986310746065, + "grad_norm": 0.925682783126831, + "learning_rate": 4.497735388521762e-06, + "loss": 0.0947, + "step": 835 + }, + { + "epoch": 2.295687885010267, + "grad_norm": 1.069482445716858, + "learning_rate": 4.337637130093879e-06, + "loss": 0.0972, + "step": 840 + }, + { + "epoch": 2.309377138945927, + "grad_norm": 1.0716198682785034, + "learning_rate": 4.179957206252962e-06, + "loss": 0.1155, + "step": 845 + }, + { + "epoch": 2.323066392881588, + "grad_norm": 0.9506390690803528, + "learning_rate": 4.0247313804426455e-06, + "loss": 0.1002, + "step": 850 + }, + { + "epoch": 2.3367556468172483, + "grad_norm": 0.8912640810012817, + "learning_rate": 3.871994859491643e-06, + "loss": 0.111, + "step": 855 + }, + { + "epoch": 2.350444900752909, + "grad_norm": 0.7894532680511475, + "learning_rate": 3.7217822856285087e-06, + "loss": 0.0912, + "step": 860 + }, + { + "epoch": 2.3641341546885695, + "grad_norm": 1.0005379915237427, + "learning_rate": 3.574127728624365e-06, + "loss": 0.103, + "step": 865 + }, + { + "epoch": 2.37782340862423, + "grad_norm": 0.9462756514549255, + "learning_rate": 3.429064678065584e-06, + "loss": 0.1041, + "step": 870 + }, + { + "epoch": 2.3915126625598906, + "grad_norm": 0.948407769203186, + "learning_rate": 3.28662603575796e-06, + "loss": 0.0904, + "step": 875 + }, + { + "epoch": 2.405201916495551, + "grad_norm": 0.8511401414871216, + "learning_rate": 3.1468441082642396e-06, + "loss": 0.0983, + "step": 880 + }, + { + "epoch": 2.4188911704312117, + "grad_norm": 1.04668390750885, + "learning_rate": 3.009750599576698e-06, + "loss": 0.1042, + "step": 885 + }, + { + "epoch": 2.432580424366872, + "grad_norm": 0.8327038884162903, + "learning_rate": 2.8753766039262872e-06, + "loss": 0.1069, + "step": 890 + }, + { + "epoch": 2.4462696783025324, + "grad_norm": 1.0867719650268555, + "learning_rate": 2.7437525987301887e-06, + "loss": 0.131, + "step": 895 + }, + { + "epoch": 2.459958932238193, + "grad_norm": 0.8856124877929688, + "learning_rate": 2.614908437679195e-06, + "loss": 0.1021, + "step": 900 + }, + { + "epoch": 2.4736481861738535, + "grad_norm": 0.8581922054290771, + "learning_rate": 2.4888733439665895e-06, + "loss": 0.106, + "step": 905 + }, + { + "epoch": 2.487337440109514, + "grad_norm": 0.8411620259284973, + "learning_rate": 2.365675903660019e-06, + "loss": 0.0974, + "step": 910 + }, + { + "epoch": 2.5010266940451746, + "grad_norm": 0.8998392224311829, + "learning_rate": 2.2453440592178837e-06, + "loss": 0.1081, + "step": 915 + }, + { + "epoch": 2.514715947980835, + "grad_norm": 0.8463298082351685, + "learning_rate": 2.1279051031516926e-06, + "loss": 0.0981, + "step": 920 + }, + { + "epoch": 2.5284052019164953, + "grad_norm": 0.8527997732162476, + "learning_rate": 2.013385671835831e-06, + "loss": 0.0914, + "step": 925 + }, + { + "epoch": 2.542094455852156, + "grad_norm": 0.758217990398407, + "learning_rate": 1.9018117394661816e-06, + "loss": 0.0992, + "step": 930 + }, + { + "epoch": 2.5557837097878164, + "grad_norm": 0.9450808167457581, + "learning_rate": 1.7932086121688668e-06, + "loss": 0.0927, + "step": 935 + }, + { + "epoch": 2.569472963723477, + "grad_norm": 0.7543664574623108, + "learning_rate": 1.6876009222605926e-06, + "loss": 0.0911, + "step": 940 + }, + { + "epoch": 2.5831622176591376, + "grad_norm": 0.8310686945915222, + "learning_rate": 1.5850126226617611e-06, + "loss": 0.0915, + "step": 945 + }, + { + "epoch": 2.596851471594798, + "grad_norm": 0.77762371301651, + "learning_rate": 1.4854669814637145e-06, + "loss": 0.0959, + "step": 950 + }, + { + "epoch": 2.6105407255304587, + "grad_norm": 0.8513213992118835, + "learning_rate": 1.388986576651276e-06, + "loss": 0.1152, + "step": 955 + }, + { + "epoch": 2.624229979466119, + "grad_norm": 0.6887306571006775, + "learning_rate": 1.2955932909818403e-06, + "loss": 0.0961, + "step": 960 + }, + { + "epoch": 2.63791923340178, + "grad_norm": 0.8975762128829956, + "learning_rate": 1.2053083070221326e-06, + "loss": 0.0891, + "step": 965 + }, + { + "epoch": 2.65160848733744, + "grad_norm": 1.103402853012085, + "learning_rate": 1.1181521023437751e-06, + "loss": 0.1025, + "step": 970 + }, + { + "epoch": 2.6652977412731005, + "grad_norm": 0.8978127837181091, + "learning_rate": 1.034144444878784e-06, + "loss": 0.0933, + "step": 975 + }, + { + "epoch": 2.6789869952087613, + "grad_norm": 0.6846758127212524, + "learning_rate": 9.533043884359616e-07, + "loss": 0.0836, + "step": 980 + }, + { + "epoch": 2.6926762491444216, + "grad_norm": 0.6819072961807251, + "learning_rate": 8.756502683793366e-07, + "loss": 0.0891, + "step": 985 + }, + { + "epoch": 2.7063655030800824, + "grad_norm": 0.7781175374984741, + "learning_rate": 8.011996974694708e-07, + "loss": 0.0786, + "step": 990 + }, + { + "epoch": 2.7200547570157427, + "grad_norm": 0.9653282761573792, + "learning_rate": 7.299695618687357e-07, + "loss": 0.1215, + "step": 995 + }, + { + "epoch": 2.733744010951403, + "grad_norm": 0.8408863544464111, + "learning_rate": 6.619760173113437e-07, + "loss": 0.0946, + "step": 1000 + }, + { + "epoch": 2.7474332648870634, + "grad_norm": 0.8417702913284302, + "learning_rate": 5.972344854390482e-07, + "loss": 0.089, + "step": 1005 + }, + { + "epoch": 2.761122518822724, + "grad_norm": 0.8843741416931152, + "learning_rate": 5.357596503033773e-07, + "loss": 0.0896, + "step": 1010 + }, + { + "epoch": 2.7748117727583845, + "grad_norm": 0.7742292284965515, + "learning_rate": 4.775654550351194e-07, + "loss": 0.0949, + "step": 1015 + }, + { + "epoch": 2.7885010266940453, + "grad_norm": 0.7977933287620544, + "learning_rate": 4.2266509868188584e-07, + "loss": 0.0971, + "step": 1020 + }, + { + "epoch": 2.8021902806297057, + "grad_norm": 0.7915933132171631, + "learning_rate": 3.7107103321443125e-07, + "loss": 0.1045, + "step": 1025 + }, + { + "epoch": 2.815879534565366, + "grad_norm": 0.8698334097862244, + "learning_rate": 3.2279496070241053e-07, + "loss": 0.1008, + "step": 1030 + }, + { + "epoch": 2.8295687885010268, + "grad_norm": 0.6500616669654846, + "learning_rate": 2.7784783066023553e-07, + "loss": 0.0863, + "step": 1035 + }, + { + "epoch": 2.843258042436687, + "grad_norm": 0.9042031168937683, + "learning_rate": 2.3623983756359825e-07, + "loss": 0.1046, + "step": 1040 + }, + { + "epoch": 2.856947296372348, + "grad_norm": 0.7163382768630981, + "learning_rate": 1.979804185372802e-07, + "loss": 0.0905, + "step": 1045 + }, + { + "epoch": 2.8706365503080082, + "grad_norm": 0.6973674893379211, + "learning_rate": 1.6307825121469165e-07, + "loss": 0.0802, + "step": 1050 + }, + { + "epoch": 2.8843258042436686, + "grad_norm": 0.714806079864502, + "learning_rate": 1.3154125176970732e-07, + "loss": 0.0865, + "step": 1055 + }, + { + "epoch": 2.8980150581793294, + "grad_norm": 0.8166260719299316, + "learning_rate": 1.0337657312119441e-07, + "loss": 0.106, + "step": 1060 + }, + { + "epoch": 2.9117043121149897, + "grad_norm": 0.7619735598564148, + "learning_rate": 7.859060331065371e-08, + "loss": 0.0941, + "step": 1065 + }, + { + "epoch": 2.9253935660506505, + "grad_norm": 0.8009273409843445, + "learning_rate": 5.7188964053345174e-08, + "loss": 0.0849, + "step": 1070 + }, + { + "epoch": 2.939082819986311, + "grad_norm": 0.73676598072052, + "learning_rate": 3.9176509463227926e-08, + "loss": 0.0982, + "step": 1075 + }, + { + "epoch": 2.952772073921971, + "grad_norm": 0.8489168882369995, + "learning_rate": 2.4557324951994253e-08, + "loss": 0.0953, + "step": 1080 + }, + { + "epoch": 2.9664613278576315, + "grad_norm": 0.7190904021263123, + "learning_rate": 1.3334726302454136e-08, + "loss": 0.0941, + "step": 1085 + }, + { + "epoch": 2.9801505817932923, + "grad_norm": 0.7214987277984619, + "learning_rate": 5.511258916485185e-09, + "loss": 0.073, + "step": 1090 + }, + { + "epoch": 2.9938398357289526, + "grad_norm": 0.771611750125885, + "learning_rate": 1.088697237709435e-09, + "loss": 0.0844, + "step": 1095 + }, + { + "epoch": 3.0, + "step": 1098, + "total_flos": 1.382977535502975e+18, + "train_loss": 0.42970009316403573, + "train_runtime": 1112.9911, + "train_samples_per_second": 31.491, + "train_steps_per_second": 0.987 + } + ], + "logging_steps": 5, + "max_steps": 1098, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.382977535502975e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..735e28adb2abb2f33994374f29744997c9f8b811 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/5_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89491b1cbaa954125579932df3d6db9844059d5f92a86dc6e8ce9444a5a7c70d +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfb01e50a5aa13e4d283da51e524f5468913c765 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 6_128_e3_3e-5 + results: [] +--- + + + +# 6_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53d0777b3b5d8c88cf6eff7c119ce21390052614 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "k_proj", + "q_proj", + "up_proj", + "down_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfdb803486122fe283f7d11778f7a51de4c9d8de --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa087ed15c8ea54d921d65c15837a26a933e5c0c438e47809b030ebb27941951 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b94e8ce219d75c45d583c1eac466d476fdfd79dd --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5590581282747187e+18, + "train_loss": 0.4520561478994972, + "train_runtime": 1232.4116, + "train_samples": 12839, + "train_samples_per_second": 31.253, + "train_steps_per_second": 0.979 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b94e8ce219d75c45d583c1eac466d476fdfd79dd --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5590581282747187e+18, + "train_loss": 0.4520561478994972, + "train_runtime": 1232.4116, + "train_samples": 12839, + "train_samples_per_second": 31.253, + "train_steps_per_second": 0.979 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac08610cb8c365070e8910767dfdae6efaa1f169 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1730 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1206, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012461059190031152, + "grad_norm": 0.6727936863899231, + "learning_rate": 1.9672131147540985e-06, + "loss": 1.5811, + "step": 5 + }, + { + "epoch": 0.024922118380062305, + "grad_norm": 0.5757392644882202, + "learning_rate": 4.426229508196722e-06, + "loss": 1.5618, + "step": 10 + }, + { + "epoch": 0.037383177570093455, + "grad_norm": 0.5858941078186035, + "learning_rate": 6.885245901639345e-06, + "loss": 1.5804, + "step": 15 + }, + { + "epoch": 0.04984423676012461, + "grad_norm": 0.5111885070800781, + "learning_rate": 9.344262295081968e-06, + "loss": 1.5798, + "step": 20 + }, + { + "epoch": 0.06230529595015576, + "grad_norm": 0.47918701171875, + "learning_rate": 1.180327868852459e-05, + "loss": 1.5033, + "step": 25 + }, + { + "epoch": 0.07476635514018691, + "grad_norm": 0.47974151372909546, + "learning_rate": 1.4262295081967213e-05, + "loss": 1.537, + "step": 30 + }, + { + "epoch": 0.08722741433021806, + "grad_norm": 0.48058050870895386, + "learning_rate": 1.6721311475409834e-05, + "loss": 1.5797, + "step": 35 + }, + { + "epoch": 0.09968847352024922, + "grad_norm": 0.5246338844299316, + "learning_rate": 1.9180327868852462e-05, + "loss": 1.4979, + "step": 40 + }, + { + "epoch": 0.11214953271028037, + "grad_norm": 0.4662446975708008, + "learning_rate": 2.1639344262295084e-05, + "loss": 1.4706, + "step": 45 + }, + { + "epoch": 0.12461059190031153, + "grad_norm": 0.559618353843689, + "learning_rate": 2.4098360655737705e-05, + "loss": 1.4541, + "step": 50 + }, + { + "epoch": 0.13707165109034267, + "grad_norm": 0.5646073222160339, + "learning_rate": 2.6557377049180327e-05, + "loss": 1.4961, + "step": 55 + }, + { + "epoch": 0.14953271028037382, + "grad_norm": 0.5784326195716858, + "learning_rate": 2.901639344262295e-05, + "loss": 1.4415, + "step": 60 + }, + { + "epoch": 0.16199376947040497, + "grad_norm": 0.6026971340179443, + "learning_rate": 2.9999491852149543e-05, + "loss": 1.3717, + "step": 65 + }, + { + "epoch": 0.17445482866043613, + "grad_norm": 0.618217945098877, + "learning_rate": 2.999638662885322e-05, + "loss": 1.3391, + "step": 70 + }, + { + "epoch": 0.18691588785046728, + "grad_norm": 0.6300649046897888, + "learning_rate": 2.9990459070319718e-05, + "loss": 1.346, + "step": 75 + }, + { + "epoch": 0.19937694704049844, + "grad_norm": 0.6421883702278137, + "learning_rate": 2.9981710292121587e-05, + "loss": 1.3001, + "step": 80 + }, + { + "epoch": 0.2118380062305296, + "grad_norm": 0.6744084358215332, + "learning_rate": 2.9970141940787794e-05, + "loss": 1.2791, + "step": 85 + }, + { + "epoch": 0.22429906542056074, + "grad_norm": 0.6489006280899048, + "learning_rate": 2.9955756193493843e-05, + "loss": 1.2842, + "step": 90 + }, + { + "epoch": 0.2367601246105919, + "grad_norm": 0.7138885855674744, + "learning_rate": 2.9938555757652027e-05, + "loss": 1.2632, + "step": 95 + }, + { + "epoch": 0.24922118380062305, + "grad_norm": 0.6651730537414551, + "learning_rate": 2.991854387040189e-05, + "loss": 1.2685, + "step": 100 + }, + { + "epoch": 0.2616822429906542, + "grad_norm": 0.7515709400177002, + "learning_rate": 2.9895724298000995e-05, + "loss": 1.224, + "step": 105 + }, + { + "epoch": 0.27414330218068533, + "grad_norm": 0.7440294623374939, + "learning_rate": 2.9870101335116107e-05, + "loss": 1.2032, + "step": 110 + }, + { + "epoch": 0.2866043613707165, + "grad_norm": 0.6743971109390259, + "learning_rate": 2.9841679804014938e-05, + "loss": 1.1713, + "step": 115 + }, + { + "epoch": 0.29906542056074764, + "grad_norm": 0.7800809144973755, + "learning_rate": 2.981046505365859e-05, + "loss": 1.2217, + "step": 120 + }, + { + "epoch": 0.3115264797507788, + "grad_norm": 0.7863685488700867, + "learning_rate": 2.9776462958694873e-05, + "loss": 1.1534, + "step": 125 + }, + { + "epoch": 0.32398753894080995, + "grad_norm": 0.8574065566062927, + "learning_rate": 2.9739679918352686e-05, + "loss": 1.1531, + "step": 130 + }, + { + "epoch": 0.3364485981308411, + "grad_norm": 0.8616034388542175, + "learning_rate": 2.9700122855237685e-05, + "loss": 1.1039, + "step": 135 + }, + { + "epoch": 0.34890965732087226, + "grad_norm": 0.9016385078430176, + "learning_rate": 2.965779921402944e-05, + "loss": 1.1245, + "step": 140 + }, + { + "epoch": 0.3613707165109034, + "grad_norm": 0.7953440546989441, + "learning_rate": 2.961271696008033e-05, + "loss": 1.1361, + "step": 145 + }, + { + "epoch": 0.37383177570093457, + "grad_norm": 0.8039241433143616, + "learning_rate": 2.9564884577916463e-05, + "loss": 1.0437, + "step": 150 + }, + { + "epoch": 0.3862928348909657, + "grad_norm": 0.9649161696434021, + "learning_rate": 2.951431106964088e-05, + "loss": 0.9916, + "step": 155 + }, + { + "epoch": 0.3987538940809969, + "grad_norm": 0.873070240020752, + "learning_rate": 2.9461005953239347e-05, + "loss": 1.056, + "step": 160 + }, + { + "epoch": 0.411214953271028, + "grad_norm": 1.0152827501296997, + "learning_rate": 2.9404979260789064e-05, + "loss": 0.9823, + "step": 165 + }, + { + "epoch": 0.4236760124610592, + "grad_norm": 1.5071896314620972, + "learning_rate": 2.934624153657061e-05, + "loss": 1.0862, + "step": 170 + }, + { + "epoch": 0.43613707165109034, + "grad_norm": 0.9236105680465698, + "learning_rate": 2.9284803835083507e-05, + "loss": 0.8974, + "step": 175 + }, + { + "epoch": 0.4485981308411215, + "grad_norm": 1.032192587852478, + "learning_rate": 2.9220677718965747e-05, + "loss": 0.991, + "step": 180 + }, + { + "epoch": 0.46105919003115264, + "grad_norm": 1.1815871000289917, + "learning_rate": 2.9153875256817696e-05, + "loss": 0.9504, + "step": 185 + }, + { + "epoch": 0.4735202492211838, + "grad_norm": 0.9613412618637085, + "learning_rate": 2.9084409020930767e-05, + "loss": 0.9487, + "step": 190 + }, + { + "epoch": 0.48598130841121495, + "grad_norm": 0.9879019856452942, + "learning_rate": 2.9012292084921306e-05, + "loss": 0.9161, + "step": 195 + }, + { + "epoch": 0.4984423676012461, + "grad_norm": 1.0387542247772217, + "learning_rate": 2.893753802127012e-05, + "loss": 0.9001, + "step": 200 + }, + { + "epoch": 0.5109034267912772, + "grad_norm": 1.0146880149841309, + "learning_rate": 2.8860160898768123e-05, + "loss": 0.8594, + "step": 205 + }, + { + "epoch": 0.5233644859813084, + "grad_norm": 1.187029242515564, + "learning_rate": 2.8780175279868577e-05, + "loss": 0.914, + "step": 210 + }, + { + "epoch": 0.5358255451713395, + "grad_norm": 1.093167781829834, + "learning_rate": 2.8697596217946426e-05, + "loss": 0.8643, + "step": 215 + }, + { + "epoch": 0.5482866043613707, + "grad_norm": 1.1544939279556274, + "learning_rate": 2.861243925446523e-05, + "loss": 0.8791, + "step": 220 + }, + { + "epoch": 0.5607476635514018, + "grad_norm": 1.19559645652771, + "learning_rate": 2.8524720416052243e-05, + "loss": 0.8912, + "step": 225 + }, + { + "epoch": 0.573208722741433, + "grad_norm": 1.1206765174865723, + "learning_rate": 2.84344562114822e-05, + "loss": 0.8017, + "step": 230 + }, + { + "epoch": 0.5856697819314641, + "grad_norm": 1.14994215965271, + "learning_rate": 2.8341663628570328e-05, + "loss": 0.8326, + "step": 235 + }, + { + "epoch": 0.5981308411214953, + "grad_norm": 1.1227439641952515, + "learning_rate": 2.824636013097524e-05, + "loss": 0.7321, + "step": 240 + }, + { + "epoch": 0.6105919003115264, + "grad_norm": 1.111411452293396, + "learning_rate": 2.8148563654912257e-05, + "loss": 0.7676, + "step": 245 + }, + { + "epoch": 0.6230529595015576, + "grad_norm": 1.1857693195343018, + "learning_rate": 2.8048292605777766e-05, + "loss": 0.8009, + "step": 250 + }, + { + "epoch": 0.6355140186915887, + "grad_norm": 1.1719634532928467, + "learning_rate": 2.7945565854685348e-05, + "loss": 0.7476, + "step": 255 + }, + { + "epoch": 0.6479750778816199, + "grad_norm": 1.2640599012374878, + "learning_rate": 2.7840402734914182e-05, + "loss": 0.7258, + "step": 260 + }, + { + "epoch": 0.660436137071651, + "grad_norm": 1.1105681657791138, + "learning_rate": 2.773282303827052e-05, + "loss": 0.7768, + "step": 265 + }, + { + "epoch": 0.6728971962616822, + "grad_norm": 1.1462687253952026, + "learning_rate": 2.762284701136283e-05, + "loss": 0.7233, + "step": 270 + }, + { + "epoch": 0.6853582554517134, + "grad_norm": 1.2131054401397705, + "learning_rate": 2.7510495351791397e-05, + "loss": 0.7093, + "step": 275 + }, + { + "epoch": 0.6978193146417445, + "grad_norm": 1.2255460023880005, + "learning_rate": 2.739578920425297e-05, + "loss": 0.6972, + "step": 280 + }, + { + "epoch": 0.7102803738317757, + "grad_norm": 1.6768884658813477, + "learning_rate": 2.727875015656135e-05, + "loss": 0.6666, + "step": 285 + }, + { + "epoch": 0.7227414330218068, + "grad_norm": 1.267047643661499, + "learning_rate": 2.7159400235584507e-05, + "loss": 0.6596, + "step": 290 + }, + { + "epoch": 0.735202492211838, + "grad_norm": 1.3083590269088745, + "learning_rate": 2.703776190309914e-05, + "loss": 0.6759, + "step": 295 + }, + { + "epoch": 0.7476635514018691, + "grad_norm": 1.2568644285202026, + "learning_rate": 2.691385805156329e-05, + "loss": 0.6405, + "step": 300 + }, + { + "epoch": 0.7601246105919003, + "grad_norm": 1.1297191381454468, + "learning_rate": 2.6787711999808026e-05, + "loss": 0.6884, + "step": 305 + }, + { + "epoch": 0.7725856697819314, + "grad_norm": 1.1897577047348022, + "learning_rate": 2.6659347488648763e-05, + "loss": 0.6322, + "step": 310 + }, + { + "epoch": 0.7850467289719626, + "grad_norm": 1.1402413845062256, + "learning_rate": 2.6528788676417238e-05, + "loss": 0.6887, + "step": 315 + }, + { + "epoch": 0.7975077881619937, + "grad_norm": 1.3091211318969727, + "learning_rate": 2.6396060134414883e-05, + "loss": 0.6234, + "step": 320 + }, + { + "epoch": 0.8099688473520249, + "grad_norm": 1.2470093965530396, + "learning_rate": 2.6261186842288482e-05, + "loss": 0.6196, + "step": 325 + }, + { + "epoch": 0.822429906542056, + "grad_norm": 1.3138375282287598, + "learning_rate": 2.6124194183328992e-05, + "loss": 0.599, + "step": 330 + }, + { + "epoch": 0.8348909657320872, + "grad_norm": 1.2084360122680664, + "learning_rate": 2.5985107939694346e-05, + "loss": 0.6272, + "step": 335 + }, + { + "epoch": 0.8473520249221184, + "grad_norm": 1.2571656703948975, + "learning_rate": 2.5843954287557253e-05, + "loss": 0.5779, + "step": 340 + }, + { + "epoch": 0.8598130841121495, + "grad_norm": 1.5297287702560425, + "learning_rate": 2.5700759792178813e-05, + "loss": 0.5528, + "step": 345 + }, + { + "epoch": 0.8722741433021807, + "grad_norm": 1.2811731100082397, + "learning_rate": 2.5555551402908896e-05, + "loss": 0.5946, + "step": 350 + }, + { + "epoch": 0.8847352024922118, + "grad_norm": 1.7837169170379639, + "learning_rate": 2.5408356448114255e-05, + "loss": 0.5477, + "step": 355 + }, + { + "epoch": 0.897196261682243, + "grad_norm": 1.4558868408203125, + "learning_rate": 2.5259202630035296e-05, + "loss": 0.6383, + "step": 360 + }, + { + "epoch": 0.9096573208722741, + "grad_norm": 1.2999902963638306, + "learning_rate": 2.51081180195725e-05, + "loss": 0.5448, + "step": 365 + }, + { + "epoch": 0.9221183800623053, + "grad_norm": 1.2640613317489624, + "learning_rate": 2.4955131051003427e-05, + "loss": 0.5533, + "step": 370 + }, + { + "epoch": 0.9345794392523364, + "grad_norm": 1.3336979150772095, + "learning_rate": 2.4800270516631376e-05, + "loss": 0.5845, + "step": 375 + }, + { + "epoch": 0.9470404984423676, + "grad_norm": 1.3800572156906128, + "learning_rate": 2.4643565561366644e-05, + "loss": 0.4931, + "step": 380 + }, + { + "epoch": 0.9595015576323987, + "grad_norm": 1.3788105249404907, + "learning_rate": 2.4485045677241415e-05, + "loss": 0.5723, + "step": 385 + }, + { + "epoch": 0.9719626168224299, + "grad_norm": 1.7141433954238892, + "learning_rate": 2.4324740697859326e-05, + "loss": 0.5232, + "step": 390 + }, + { + "epoch": 0.9844236760124611, + "grad_norm": 1.282641887664795, + "learning_rate": 2.4162680792780775e-05, + "loss": 0.5365, + "step": 395 + }, + { + "epoch": 0.9968847352024922, + "grad_norm": 1.2487872838974, + "learning_rate": 2.399889646184494e-05, + "loss": 0.5339, + "step": 400 + }, + { + "epoch": 1.0074766355140188, + "grad_norm": 1.3565014600753784, + "learning_rate": 2.3833418529429728e-05, + "loss": 0.4845, + "step": 405 + }, + { + "epoch": 1.01993769470405, + "grad_norm": 1.2898744344711304, + "learning_rate": 2.366627813865055e-05, + "loss": 0.4468, + "step": 410 + }, + { + "epoch": 1.032398753894081, + "grad_norm": 1.3483545780181885, + "learning_rate": 2.349750674549918e-05, + "loss": 0.4051, + "step": 415 + }, + { + "epoch": 1.0448598130841122, + "grad_norm": 1.3324998617172241, + "learning_rate": 2.332713611292371e-05, + "loss": 0.4889, + "step": 420 + }, + { + "epoch": 1.0573208722741434, + "grad_norm": 1.804293155670166, + "learning_rate": 2.3155198304850694e-05, + "loss": 0.4221, + "step": 425 + }, + { + "epoch": 1.0697819314641746, + "grad_norm": 1.2247971296310425, + "learning_rate": 2.2981725680150745e-05, + "loss": 0.4198, + "step": 430 + }, + { + "epoch": 1.0822429906542057, + "grad_norm": 1.218647837638855, + "learning_rate": 2.2806750886548508e-05, + "loss": 0.444, + "step": 435 + }, + { + "epoch": 1.0947040498442369, + "grad_norm": 1.4913464784622192, + "learning_rate": 2.2630306854478335e-05, + "loss": 0.4431, + "step": 440 + }, + { + "epoch": 1.107165109034268, + "grad_norm": 1.230381727218628, + "learning_rate": 2.245242679088679e-05, + "loss": 0.3749, + "step": 445 + }, + { + "epoch": 1.1196261682242992, + "grad_norm": 1.3462220430374146, + "learning_rate": 2.2273144172982985e-05, + "loss": 0.3846, + "step": 450 + }, + { + "epoch": 1.1320872274143303, + "grad_norm": 1.2514972686767578, + "learning_rate": 2.2092492741938222e-05, + "loss": 0.4447, + "step": 455 + }, + { + "epoch": 1.1445482866043615, + "grad_norm": 1.5152968168258667, + "learning_rate": 2.1910506496535816e-05, + "loss": 0.4076, + "step": 460 + }, + { + "epoch": 1.1570093457943926, + "grad_norm": 1.348156452178955, + "learning_rate": 2.1727219686772494e-05, + "loss": 0.4279, + "step": 465 + }, + { + "epoch": 1.1694704049844238, + "grad_norm": 1.3284410238265991, + "learning_rate": 2.154266680741253e-05, + "loss": 0.3732, + "step": 470 + }, + { + "epoch": 1.181931464174455, + "grad_norm": 1.2210983037948608, + "learning_rate": 2.1356882591495795e-05, + "loss": 0.3425, + "step": 475 + }, + { + "epoch": 1.194392523364486, + "grad_norm": 1.6162492036819458, + "learning_rate": 2.116990200380093e-05, + "loss": 0.3764, + "step": 480 + }, + { + "epoch": 1.2068535825545172, + "grad_norm": 1.367363691329956, + "learning_rate": 2.0981760234264983e-05, + "loss": 0.3718, + "step": 485 + }, + { + "epoch": 1.2193146417445484, + "grad_norm": 1.3457995653152466, + "learning_rate": 2.07924926913606e-05, + "loss": 0.3426, + "step": 490 + }, + { + "epoch": 1.2317757009345796, + "grad_norm": 1.2486886978149414, + "learning_rate": 2.0602134995432124e-05, + "loss": 0.33, + "step": 495 + }, + { + "epoch": 1.2442367601246107, + "grad_norm": 1.3996330499649048, + "learning_rate": 2.0410722971991802e-05, + "loss": 0.3275, + "step": 500 + }, + { + "epoch": 1.2566978193146419, + "grad_norm": 1.6613109111785889, + "learning_rate": 2.0218292644977396e-05, + "loss": 0.3501, + "step": 505 + }, + { + "epoch": 1.269158878504673, + "grad_norm": 1.1724131107330322, + "learning_rate": 2.002488022997244e-05, + "loss": 0.3978, + "step": 510 + }, + { + "epoch": 1.2816199376947042, + "grad_norm": 1.31151282787323, + "learning_rate": 1.9830522127390428e-05, + "loss": 0.3484, + "step": 515 + }, + { + "epoch": 1.2940809968847353, + "grad_norm": 1.3785041570663452, + "learning_rate": 1.963525491562421e-05, + "loss": 0.3247, + "step": 520 + }, + { + "epoch": 1.3065420560747665, + "grad_norm": 1.3729525804519653, + "learning_rate": 1.943911534416193e-05, + "loss": 0.3406, + "step": 525 + }, + { + "epoch": 1.3190031152647976, + "grad_norm": 1.4904634952545166, + "learning_rate": 1.924214032667069e-05, + "loss": 0.3399, + "step": 530 + }, + { + "epoch": 1.3314641744548288, + "grad_norm": 1.276444673538208, + "learning_rate": 1.9044366934049408e-05, + "loss": 0.3462, + "step": 535 + }, + { + "epoch": 1.34392523364486, + "grad_norm": 1.442848801612854, + "learning_rate": 1.8845832387451995e-05, + "loss": 0.3142, + "step": 540 + }, + { + "epoch": 1.356386292834891, + "grad_norm": 1.3109973669052124, + "learning_rate": 1.8646574051282337e-05, + "loss": 0.3454, + "step": 545 + }, + { + "epoch": 1.3688473520249222, + "grad_norm": 1.5475910902023315, + "learning_rate": 1.844662942616224e-05, + "loss": 0.3169, + "step": 550 + }, + { + "epoch": 1.3813084112149534, + "grad_norm": 1.2680741548538208, + "learning_rate": 1.8246036141873786e-05, + "loss": 0.3229, + "step": 555 + }, + { + "epoch": 1.3937694704049846, + "grad_norm": 1.2365118265151978, + "learning_rate": 1.804483195027739e-05, + "loss": 0.2759, + "step": 560 + }, + { + "epoch": 1.4062305295950157, + "grad_norm": 1.4727873802185059, + "learning_rate": 1.7843054718206818e-05, + "loss": 0.2774, + "step": 565 + }, + { + "epoch": 1.4186915887850469, + "grad_norm": 1.200822114944458, + "learning_rate": 1.7640742420342672e-05, + "loss": 0.3114, + "step": 570 + }, + { + "epoch": 1.431152647975078, + "grad_norm": 1.3123586177825928, + "learning_rate": 1.7437933132065452e-05, + "loss": 0.2649, + "step": 575 + }, + { + "epoch": 1.4436137071651092, + "grad_norm": 1.3206313848495483, + "learning_rate": 1.7234665022289777e-05, + "loss": 0.2843, + "step": 580 + }, + { + "epoch": 1.45607476635514, + "grad_norm": 1.4776965379714966, + "learning_rate": 1.7030976346280924e-05, + "loss": 0.2836, + "step": 585 + }, + { + "epoch": 1.4685358255451713, + "grad_norm": 1.314396619796753, + "learning_rate": 1.6826905438455174e-05, + "loss": 0.3027, + "step": 590 + }, + { + "epoch": 1.4809968847352024, + "grad_norm": 1.4666306972503662, + "learning_rate": 1.662249070516523e-05, + "loss": 0.2738, + "step": 595 + }, + { + "epoch": 1.4934579439252336, + "grad_norm": 1.3325856924057007, + "learning_rate": 1.641777061747209e-05, + "loss": 0.2843, + "step": 600 + }, + { + "epoch": 1.505919003115265, + "grad_norm": 1.3460628986358643, + "learning_rate": 1.621278370390476e-05, + "loss": 0.2605, + "step": 605 + }, + { + "epoch": 1.518380062305296, + "grad_norm": 1.5369651317596436, + "learning_rate": 1.6007568543209153e-05, + "loss": 0.2906, + "step": 610 + }, + { + "epoch": 1.5308411214953273, + "grad_norm": 1.5562386512756348, + "learning_rate": 1.5802163757087513e-05, + "loss": 0.2698, + "step": 615 + }, + { + "epoch": 1.5433021806853584, + "grad_norm": 1.3391906023025513, + "learning_rate": 1.5596608002929793e-05, + "loss": 0.284, + "step": 620 + }, + { + "epoch": 1.5557632398753896, + "grad_norm": 1.4709603786468506, + "learning_rate": 1.539093996653829e-05, + "loss": 0.2631, + "step": 625 + }, + { + "epoch": 1.5682242990654207, + "grad_norm": 1.1594626903533936, + "learning_rate": 1.518519835484691e-05, + "loss": 0.2415, + "step": 630 + }, + { + "epoch": 1.5806853582554519, + "grad_norm": 1.1949822902679443, + "learning_rate": 1.4979421888636532e-05, + "loss": 0.2319, + "step": 635 + }, + { + "epoch": 1.593146417445483, + "grad_norm": 1.2472540140151978, + "learning_rate": 1.4773649295247668e-05, + "loss": 0.2679, + "step": 640 + }, + { + "epoch": 1.6056074766355142, + "grad_norm": 1.1207035779953003, + "learning_rate": 1.4567919301291976e-05, + "loss": 0.242, + "step": 645 + }, + { + "epoch": 1.6180685358255453, + "grad_norm": 1.4375412464141846, + "learning_rate": 1.4362270625363852e-05, + "loss": 0.2934, + "step": 650 + }, + { + "epoch": 1.6305295950155765, + "grad_norm": 1.4880056381225586, + "learning_rate": 1.415674197075355e-05, + "loss": 0.2717, + "step": 655 + }, + { + "epoch": 1.6429906542056076, + "grad_norm": 1.31331467628479, + "learning_rate": 1.3951372018163197e-05, + "loss": 0.2625, + "step": 660 + }, + { + "epoch": 1.6554517133956388, + "grad_norm": 1.2165220975875854, + "learning_rate": 1.3746199418427044e-05, + "loss": 0.232, + "step": 665 + }, + { + "epoch": 1.66791277258567, + "grad_norm": 1.3418197631835938, + "learning_rate": 1.3541262785237321e-05, + "loss": 0.2427, + "step": 670 + }, + { + "epoch": 1.680373831775701, + "grad_norm": 1.1663246154785156, + "learning_rate": 1.3336600687877124e-05, + "loss": 0.2192, + "step": 675 + }, + { + "epoch": 1.6928348909657323, + "grad_norm": 1.4799425601959229, + "learning_rate": 1.313225164396162e-05, + "loss": 0.2342, + "step": 680 + }, + { + "epoch": 1.7052959501557632, + "grad_norm": 1.4028592109680176, + "learning_rate": 1.2928254112189e-05, + "loss": 0.2384, + "step": 685 + }, + { + "epoch": 1.7177570093457943, + "grad_norm": 1.3292847871780396, + "learning_rate": 1.272464648510251e-05, + "loss": 0.1946, + "step": 690 + }, + { + "epoch": 1.7302180685358255, + "grad_norm": 1.3564809560775757, + "learning_rate": 1.2521467081864945e-05, + "loss": 0.2152, + "step": 695 + }, + { + "epoch": 1.7426791277258566, + "grad_norm": 1.203546404838562, + "learning_rate": 1.2318754141046936e-05, + "loss": 0.2199, + "step": 700 + }, + { + "epoch": 1.7551401869158878, + "grad_norm": 1.3742973804473877, + "learning_rate": 1.211654581343039e-05, + "loss": 0.2261, + "step": 705 + }, + { + "epoch": 1.767601246105919, + "grad_norm": 1.3650232553482056, + "learning_rate": 1.1914880154828514e-05, + "loss": 0.2309, + "step": 710 + }, + { + "epoch": 1.78006230529595, + "grad_norm": 1.2802412509918213, + "learning_rate": 1.1713795118923659e-05, + "loss": 0.2355, + "step": 715 + }, + { + "epoch": 1.7925233644859813, + "grad_norm": 1.329018473625183, + "learning_rate": 1.1513328550124379e-05, + "loss": 0.217, + "step": 720 + }, + { + "epoch": 1.8049844236760124, + "grad_norm": 1.9239187240600586, + "learning_rate": 1.1313518176443099e-05, + "loss": 0.2142, + "step": 725 + }, + { + "epoch": 1.8174454828660436, + "grad_norm": 1.3798376321792603, + "learning_rate": 1.1114401602395647e-05, + "loss": 0.1698, + "step": 730 + }, + { + "epoch": 1.8299065420560747, + "grad_norm": 1.430258870124817, + "learning_rate": 1.0916016301924056e-05, + "loss": 0.218, + "step": 735 + }, + { + "epoch": 1.8423676012461059, + "grad_norm": 1.1668757200241089, + "learning_rate": 1.071839961134393e-05, + "loss": 0.1919, + "step": 740 + }, + { + "epoch": 1.854828660436137, + "grad_norm": 1.4789959192276, + "learning_rate": 1.0521588722317707e-05, + "loss": 0.1756, + "step": 745 + }, + { + "epoch": 1.8672897196261682, + "grad_norm": 1.2754652500152588, + "learning_rate": 1.0325620674855147e-05, + "loss": 0.1825, + "step": 750 + }, + { + "epoch": 1.8797507788161993, + "grad_norm": 1.1667087078094482, + "learning_rate": 1.0130532350342381e-05, + "loss": 0.1832, + "step": 755 + }, + { + "epoch": 1.8922118380062305, + "grad_norm": 1.2377445697784424, + "learning_rate": 9.936360464600769e-06, + "loss": 0.1819, + "step": 760 + }, + { + "epoch": 1.9046728971962616, + "grad_norm": 1.1091711521148682, + "learning_rate": 9.74314156097697e-06, + "loss": 0.1923, + "step": 765 + }, + { + "epoch": 1.9171339563862928, + "grad_norm": 1.2358664274215698, + "learning_rate": 9.550912003465442e-06, + "loss": 0.1853, + "step": 770 + }, + { + "epoch": 1.929595015576324, + "grad_norm": 1.1740466356277466, + "learning_rate": 9.359707969864688e-06, + "loss": 0.1582, + "step": 775 + }, + { + "epoch": 1.9420560747663551, + "grad_norm": 1.4427814483642578, + "learning_rate": 9.16956544496857e-06, + "loss": 0.1746, + "step": 780 + }, + { + "epoch": 1.9545171339563863, + "grad_norm": 1.382459282875061, + "learning_rate": 8.980520213793934e-06, + "loss": 0.1652, + "step": 785 + }, + { + "epoch": 1.9669781931464174, + "grad_norm": 1.261889934539795, + "learning_rate": 8.792607854845829e-06, + "loss": 0.1682, + "step": 790 + }, + { + "epoch": 1.9794392523364486, + "grad_norm": 1.274738073348999, + "learning_rate": 8.605863733421594e-06, + "loss": 0.1755, + "step": 795 + }, + { + "epoch": 1.9919003115264797, + "grad_norm": 1.2673702239990234, + "learning_rate": 8.420322994955074e-06, + "loss": 0.1808, + "step": 800 + }, + { + "epoch": 2.0024922118380064, + "grad_norm": 1.131147027015686, + "learning_rate": 8.236020558402222e-06, + "loss": 0.1635, + "step": 805 + }, + { + "epoch": 2.0149532710280376, + "grad_norm": 1.354861855506897, + "learning_rate": 8.052991109669306e-06, + "loss": 0.1374, + "step": 810 + }, + { + "epoch": 2.0274143302180687, + "grad_norm": 1.3032562732696533, + "learning_rate": 7.87126909508499e-06, + "loss": 0.135, + "step": 815 + }, + { + "epoch": 2.0398753894081, + "grad_norm": 1.0821024179458618, + "learning_rate": 7.690888714917507e-06, + "loss": 0.1436, + "step": 820 + }, + { + "epoch": 2.052336448598131, + "grad_norm": 1.127859115600586, + "learning_rate": 7.511883916938109e-06, + "loss": 0.1367, + "step": 825 + }, + { + "epoch": 2.064797507788162, + "grad_norm": 1.4026978015899658, + "learning_rate": 7.334288390032098e-06, + "loss": 0.1271, + "step": 830 + }, + { + "epoch": 2.0772585669781933, + "grad_norm": 1.2083226442337036, + "learning_rate": 7.158135557858515e-06, + "loss": 0.1287, + "step": 835 + }, + { + "epoch": 2.0897196261682245, + "grad_norm": 2.912933588027954, + "learning_rate": 6.983458572559782e-06, + "loss": 0.1313, + "step": 840 + }, + { + "epoch": 2.1021806853582556, + "grad_norm": 0.9981629848480225, + "learning_rate": 6.81029030852244e-06, + "loss": 0.1369, + "step": 845 + }, + { + "epoch": 2.114641744548287, + "grad_norm": 1.1591410636901855, + "learning_rate": 6.63866335619015e-06, + "loss": 0.1258, + "step": 850 + }, + { + "epoch": 2.127102803738318, + "grad_norm": 1.3078807592391968, + "learning_rate": 6.468610015930143e-06, + "loss": 0.1175, + "step": 855 + }, + { + "epoch": 2.139563862928349, + "grad_norm": 1.1207478046417236, + "learning_rate": 6.3001622919542495e-06, + "loss": 0.118, + "step": 860 + }, + { + "epoch": 2.1520249221183803, + "grad_norm": 1.180823802947998, + "learning_rate": 6.133351886295691e-06, + "loss": 0.1141, + "step": 865 + }, + { + "epoch": 2.1644859813084114, + "grad_norm": 1.0722765922546387, + "learning_rate": 5.9682101928426966e-06, + "loss": 0.1362, + "step": 870 + }, + { + "epoch": 2.1769470404984426, + "grad_norm": 0.9923248887062073, + "learning_rate": 5.804768291430174e-06, + "loss": 0.1244, + "step": 875 + }, + { + "epoch": 2.1894080996884737, + "grad_norm": 1.104004144668579, + "learning_rate": 5.643056941990433e-06, + "loss": 0.1266, + "step": 880 + }, + { + "epoch": 2.201869158878505, + "grad_norm": 1.0187407732009888, + "learning_rate": 5.483106578764136e-06, + "loss": 0.1064, + "step": 885 + }, + { + "epoch": 2.214330218068536, + "grad_norm": 1.1444305181503296, + "learning_rate": 5.324947304572553e-06, + "loss": 0.1221, + "step": 890 + }, + { + "epoch": 2.226791277258567, + "grad_norm": 1.1207917928695679, + "learning_rate": 5.1686088851521685e-06, + "loss": 0.1209, + "step": 895 + }, + { + "epoch": 2.2392523364485983, + "grad_norm": 1.5274792909622192, + "learning_rate": 5.014120743552749e-06, + "loss": 0.122, + "step": 900 + }, + { + "epoch": 2.251713395638629, + "grad_norm": 1.05408775806427, + "learning_rate": 4.861511954599883e-06, + "loss": 0.1147, + "step": 905 + }, + { + "epoch": 2.2641744548286606, + "grad_norm": 1.1335138082504272, + "learning_rate": 4.710811239423083e-06, + "loss": 0.112, + "step": 910 + }, + { + "epoch": 2.2766355140186914, + "grad_norm": 1.0894725322723389, + "learning_rate": 4.5620469600504355e-06, + "loss": 0.1173, + "step": 915 + }, + { + "epoch": 2.289096573208723, + "grad_norm": 1.2253080606460571, + "learning_rate": 4.415247114070834e-06, + "loss": 0.1255, + "step": 920 + }, + { + "epoch": 2.3015576323987537, + "grad_norm": 1.0113202333450317, + "learning_rate": 4.270439329364799e-06, + "loss": 0.1138, + "step": 925 + }, + { + "epoch": 2.3140186915887853, + "grad_norm": 1.131226658821106, + "learning_rate": 4.1276508589048986e-06, + "loss": 0.1283, + "step": 930 + }, + { + "epoch": 2.326479750778816, + "grad_norm": 0.9833255410194397, + "learning_rate": 3.986908575626699e-06, + "loss": 0.1129, + "step": 935 + }, + { + "epoch": 2.3389408099688476, + "grad_norm": 1.055303692817688, + "learning_rate": 3.848238967371265e-06, + "loss": 0.1087, + "step": 940 + }, + { + "epoch": 2.3514018691588783, + "grad_norm": 1.2388861179351807, + "learning_rate": 3.7116681319001018e-06, + "loss": 0.1151, + "step": 945 + }, + { + "epoch": 2.36386292834891, + "grad_norm": 0.977337658405304, + "learning_rate": 3.5772217719835384e-06, + "loss": 0.1073, + "step": 950 + }, + { + "epoch": 2.3763239875389406, + "grad_norm": 0.9292895793914795, + "learning_rate": 3.444925190563445e-06, + "loss": 0.1059, + "step": 955 + }, + { + "epoch": 2.388785046728972, + "grad_norm": 0.8068581819534302, + "learning_rate": 3.3148032859911844e-06, + "loss": 0.0881, + "step": 960 + }, + { + "epoch": 2.401246105919003, + "grad_norm": 0.8657582402229309, + "learning_rate": 3.186880547341727e-06, + "loss": 0.1056, + "step": 965 + }, + { + "epoch": 2.4137071651090345, + "grad_norm": 0.7917605638504028, + "learning_rate": 3.0611810498047742e-06, + "loss": 0.0976, + "step": 970 + }, + { + "epoch": 2.426168224299065, + "grad_norm": 1.0184714794158936, + "learning_rate": 2.937728450153789e-06, + "loss": 0.1088, + "step": 975 + }, + { + "epoch": 2.438629283489097, + "grad_norm": 0.933066189289093, + "learning_rate": 2.816545982293752e-06, + "loss": 0.108, + "step": 980 + }, + { + "epoch": 2.4510903426791275, + "grad_norm": 1.0022294521331787, + "learning_rate": 2.6976564528885422e-06, + "loss": 0.1106, + "step": 985 + }, + { + "epoch": 2.463551401869159, + "grad_norm": 0.8450770378112793, + "learning_rate": 2.5810822370686804e-06, + "loss": 0.0872, + "step": 990 + }, + { + "epoch": 2.47601246105919, + "grad_norm": 1.0702710151672363, + "learning_rate": 2.466845274220316e-06, + "loss": 0.0992, + "step": 995 + }, + { + "epoch": 2.4884735202492214, + "grad_norm": 0.7924549579620361, + "learning_rate": 2.3549670638562016e-06, + "loss": 0.0986, + "step": 1000 + }, + { + "epoch": 2.500934579439252, + "grad_norm": 0.8309375047683716, + "learning_rate": 2.2454686615694785e-06, + "loss": 0.1135, + "step": 1005 + }, + { + "epoch": 2.5133956386292837, + "grad_norm": 0.9416615962982178, + "learning_rate": 2.138370675070977e-06, + "loss": 0.1148, + "step": 1010 + }, + { + "epoch": 2.5258566978193144, + "grad_norm": 0.9909353852272034, + "learning_rate": 2.0336932603108355e-06, + "loss": 0.0925, + "step": 1015 + }, + { + "epoch": 2.538317757009346, + "grad_norm": 0.9023491144180298, + "learning_rate": 1.9314561176851235e-06, + "loss": 0.1151, + "step": 1020 + }, + { + "epoch": 2.5507788161993767, + "grad_norm": 0.9924950003623962, + "learning_rate": 1.8316784883282105e-06, + "loss": 0.0878, + "step": 1025 + }, + { + "epoch": 2.5632398753894083, + "grad_norm": 0.8081493377685547, + "learning_rate": 1.7343791504915684e-06, + "loss": 0.106, + "step": 1030 + }, + { + "epoch": 2.575700934579439, + "grad_norm": 0.7538472414016724, + "learning_rate": 1.6395764160096678e-06, + "loss": 0.1075, + "step": 1035 + }, + { + "epoch": 2.5881619937694706, + "grad_norm": 0.9873364567756653, + "learning_rate": 1.547288126853697e-06, + "loss": 0.1021, + "step": 1040 + }, + { + "epoch": 2.6006230529595014, + "grad_norm": 0.8629933595657349, + "learning_rate": 1.4575316517736714e-06, + "loss": 0.1232, + "step": 1045 + }, + { + "epoch": 2.613084112149533, + "grad_norm": 1.1020119190216064, + "learning_rate": 1.370323883029615e-06, + "loss": 0.1239, + "step": 1050 + }, + { + "epoch": 2.6255451713395637, + "grad_norm": 0.8503817915916443, + "learning_rate": 1.2856812332124274e-06, + "loss": 0.0913, + "step": 1055 + }, + { + "epoch": 2.6380062305295953, + "grad_norm": 0.8368441462516785, + "learning_rate": 1.2036196321550096e-06, + "loss": 0.1039, + "step": 1060 + }, + { + "epoch": 2.650467289719626, + "grad_norm": 0.7688242793083191, + "learning_rate": 1.1241545239342609e-06, + "loss": 0.1005, + "step": 1065 + }, + { + "epoch": 2.6629283489096576, + "grad_norm": 0.9436780214309692, + "learning_rate": 1.0473008639644814e-06, + "loss": 0.1102, + "step": 1070 + }, + { + "epoch": 2.6753894080996883, + "grad_norm": 0.9523565173149109, + "learning_rate": 9.730731161827528e-07, + "loss": 0.108, + "step": 1075 + }, + { + "epoch": 2.68785046728972, + "grad_norm": 0.7680991291999817, + "learning_rate": 9.014852503268045e-07, + "loss": 0.0858, + "step": 1080 + }, + { + "epoch": 2.7003115264797506, + "grad_norm": 0.6620835661888123, + "learning_rate": 8.325507393059101e-07, + "loss": 0.1009, + "step": 1085 + }, + { + "epoch": 2.712772585669782, + "grad_norm": 0.7275515794754028, + "learning_rate": 7.662825566652442e-07, + "loss": 0.0936, + "step": 1090 + }, + { + "epoch": 2.725233644859813, + "grad_norm": 0.7301705479621887, + "learning_rate": 7.026931741442783e-07, + "loss": 0.0992, + "step": 1095 + }, + { + "epoch": 2.7376947040498445, + "grad_norm": 0.8045859336853027, + "learning_rate": 6.417945593295638e-07, + "loss": 0.0844, + "step": 1100 + }, + { + "epoch": 2.750155763239875, + "grad_norm": 0.7369112968444824, + "learning_rate": 5.835981734024348e-07, + "loss": 0.0916, + "step": 1105 + }, + { + "epoch": 2.762616822429907, + "grad_norm": 0.7877091765403748, + "learning_rate": 5.281149689819981e-07, + "loss": 0.0996, + "step": 1110 + }, + { + "epoch": 2.7750778816199375, + "grad_norm": 0.8165203332901001, + "learning_rate": 4.7535538806383006e-07, + "loss": 0.0942, + "step": 1115 + }, + { + "epoch": 2.787538940809969, + "grad_norm": 0.9592405557632446, + "learning_rate": 4.2532936005479585e-07, + "loss": 0.0975, + "step": 1120 + }, + { + "epoch": 2.8, + "grad_norm": 0.7340384721755981, + "learning_rate": 3.7804629990431884e-07, + "loss": 0.0858, + "step": 1125 + }, + { + "epoch": 2.8124610591900314, + "grad_norm": 0.7193246483802795, + "learning_rate": 3.335151063324765e-07, + "loss": 0.0934, + "step": 1130 + }, + { + "epoch": 2.824922118380062, + "grad_norm": 0.8057674765586853, + "learning_rate": 2.917441601552534e-07, + "loss": 0.0883, + "step": 1135 + }, + { + "epoch": 2.8373831775700937, + "grad_norm": 0.7146017551422119, + "learning_rate": 2.527413227072628e-07, + "loss": 0.0836, + "step": 1140 + }, + { + "epoch": 2.8498442367601244, + "grad_norm": 0.8160943984985352, + "learning_rate": 2.165139343622352e-07, + "loss": 0.0959, + "step": 1145 + }, + { + "epoch": 2.862305295950156, + "grad_norm": 0.7842468619346619, + "learning_rate": 1.830688131515551e-07, + "loss": 0.0915, + "step": 1150 + }, + { + "epoch": 2.8747663551401867, + "grad_norm": 0.8558725118637085, + "learning_rate": 1.5241225348109898e-07, + "loss": 0.1047, + "step": 1155 + }, + { + "epoch": 2.8872274143302183, + "grad_norm": 0.7245238423347473, + "learning_rate": 1.2455002494661972e-07, + "loss": 0.1023, + "step": 1160 + }, + { + "epoch": 2.899688473520249, + "grad_norm": 0.7568594217300415, + "learning_rate": 9.948737124790331e-08, + "loss": 0.0828, + "step": 1165 + }, + { + "epoch": 2.91214953271028, + "grad_norm": 0.9535264372825623, + "learning_rate": 7.722900920190179e-08, + "loss": 0.0983, + "step": 1170 + }, + { + "epoch": 2.9246105919003114, + "grad_norm": 0.8446788191795349, + "learning_rate": 5.777912785502493e-08, + "loss": 0.1031, + "step": 1175 + }, + { + "epoch": 2.9370716510903425, + "grad_norm": 0.8228189945220947, + "learning_rate": 4.114138769474918e-08, + "loss": 0.1084, + "step": 1180 + }, + { + "epoch": 2.9495327102803737, + "grad_norm": 0.7263519167900085, + "learning_rate": 2.731891996071878e-08, + "loss": 0.0917, + "step": 1185 + }, + { + "epoch": 2.961993769470405, + "grad_norm": 0.8366661071777344, + "learning_rate": 1.6314326055440475e-08, + "loss": 0.0973, + "step": 1190 + }, + { + "epoch": 2.974454828660436, + "grad_norm": 0.7568303346633911, + "learning_rate": 8.129677054693474e-09, + "loss": 0.0807, + "step": 1195 + }, + { + "epoch": 2.986915887850467, + "grad_norm": 0.6661731004714966, + "learning_rate": 2.7665133177545708e-09, + "loss": 0.0835, + "step": 1200 + }, + { + "epoch": 2.9993769470404983, + "grad_norm": 0.8253821730613708, + "learning_rate": 2.2584419750504293e-10, + "loss": 0.0999, + "step": 1205 + }, + { + "epoch": 3.0, + "step": 1206, + "total_flos": 1.5590581282747187e+18, + "train_loss": 0.4520561478994972, + "train_runtime": 1232.4116, + "train_samples_per_second": 31.253, + "train_steps_per_second": 0.979 + } + ], + "logging_steps": 5, + "max_steps": 1206, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5590581282747187e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..910dec684c0b499600aa61fe38237631c271dd27 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/6_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb513aa014f16a8822abae19f871f5156911904ad29524831cdf87b9a414f53 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6e9ebccee10484dbcd5f156277ece39ee91cf105 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 7_128_e3_3e-5 + results: [] +--- + + + +# 7_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a84f325b2de9620a271d1ff29cb3ffcbb0195b9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "up_proj", + "gate_proj", + "q_proj", + "k_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d19926bcf027819dee76c3fa383ae841f48d31f --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e322dc8a5868b6c59fef5af43f4c6b105ec36b8c2cf76a469d9f17a570284f0 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c867a9120194d57cfef162f0b38f3fd2257df10 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.6618542086655836e+18, + "train_loss": 0.44569373376335236, + "train_runtime": 1314.7893, + "train_samples": 14097, + "train_samples_per_second": 32.166, + "train_steps_per_second": 1.006 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2c867a9120194d57cfef162f0b38f3fd2257df10 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.6618542086655836e+18, + "train_loss": 0.44569373376335236, + "train_runtime": 1314.7893, + "train_samples": 14097, + "train_samples_per_second": 32.166, + "train_steps_per_second": 1.006 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..66f8f6b1d09212ff8a2088a1cd1de1846c879876 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1891 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1323, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011344299489506523, + "grad_norm": 0.6247544288635254, + "learning_rate": 1.791044776119403e-06, + "loss": 1.5873, + "step": 5 + }, + { + "epoch": 0.022688598979013045, + "grad_norm": 0.6385117173194885, + "learning_rate": 4.029850746268657e-06, + "loss": 1.5365, + "step": 10 + }, + { + "epoch": 0.03403289846851957, + "grad_norm": 0.5840207934379578, + "learning_rate": 6.268656716417911e-06, + "loss": 1.5693, + "step": 15 + }, + { + "epoch": 0.04537719795802609, + "grad_norm": 0.5426321029663086, + "learning_rate": 8.507462686567164e-06, + "loss": 1.518, + "step": 20 + }, + { + "epoch": 0.05672149744753262, + "grad_norm": 0.5238271355628967, + "learning_rate": 1.0746268656716418e-05, + "loss": 1.5352, + "step": 25 + }, + { + "epoch": 0.06806579693703914, + "grad_norm": 0.5286252498626709, + "learning_rate": 1.2985074626865672e-05, + "loss": 1.5208, + "step": 30 + }, + { + "epoch": 0.07941009642654566, + "grad_norm": 0.47477537393569946, + "learning_rate": 1.5223880597014927e-05, + "loss": 1.4859, + "step": 35 + }, + { + "epoch": 0.09075439591605218, + "grad_norm": 0.5288901925086975, + "learning_rate": 1.746268656716418e-05, + "loss": 1.4994, + "step": 40 + }, + { + "epoch": 0.1020986954055587, + "grad_norm": 0.5184392333030701, + "learning_rate": 1.9701492537313435e-05, + "loss": 1.4781, + "step": 45 + }, + { + "epoch": 0.11344299489506524, + "grad_norm": 0.4878067076206207, + "learning_rate": 2.194029850746269e-05, + "loss": 1.4423, + "step": 50 + }, + { + "epoch": 0.12478729438457176, + "grad_norm": 0.5208298563957214, + "learning_rate": 2.417910447761194e-05, + "loss": 1.4813, + "step": 55 + }, + { + "epoch": 0.13613159387407828, + "grad_norm": 0.5356929302215576, + "learning_rate": 2.6417910447761193e-05, + "loss": 1.4474, + "step": 60 + }, + { + "epoch": 0.1474758933635848, + "grad_norm": 0.5309861302375793, + "learning_rate": 2.8656716417910447e-05, + "loss": 1.3925, + "step": 65 + }, + { + "epoch": 0.15882019285309132, + "grad_norm": 0.5708047747612, + "learning_rate": 2.9999812310137735e-05, + "loss": 1.4325, + "step": 70 + }, + { + "epoch": 0.17016449234259784, + "grad_norm": 0.5696940422058105, + "learning_rate": 2.9997700853128978e-05, + "loss": 1.3478, + "step": 75 + }, + { + "epoch": 0.18150879183210436, + "grad_norm": 0.5587894320487976, + "learning_rate": 2.99932436581301e-05, + "loss": 1.2803, + "step": 80 + }, + { + "epoch": 0.19285309132161088, + "grad_norm": 0.6514503359794617, + "learning_rate": 2.9986441422275408e-05, + "loss": 1.2576, + "step": 85 + }, + { + "epoch": 0.2041973908111174, + "grad_norm": 0.6490641236305237, + "learning_rate": 2.997729520947884e-05, + "loss": 1.3455, + "step": 90 + }, + { + "epoch": 0.21554169030062392, + "grad_norm": 0.6715092658996582, + "learning_rate": 2.9965806450267583e-05, + "loss": 1.2769, + "step": 95 + }, + { + "epoch": 0.22688598979013047, + "grad_norm": 0.7374163866043091, + "learning_rate": 2.9951976941558322e-05, + "loss": 1.2161, + "step": 100 + }, + { + "epoch": 0.238230289279637, + "grad_norm": 0.7029914855957031, + "learning_rate": 2.993580884637621e-05, + "loss": 1.3058, + "step": 105 + }, + { + "epoch": 0.2495745887691435, + "grad_norm": 0.6127893924713135, + "learning_rate": 2.9917304693516506e-05, + "loss": 1.1644, + "step": 110 + }, + { + "epoch": 0.26091888825865, + "grad_norm": 0.7816905975341797, + "learning_rate": 2.989646737714912e-05, + "loss": 1.2524, + "step": 115 + }, + { + "epoch": 0.27226318774815655, + "grad_norm": 0.7833459377288818, + "learning_rate": 2.9873300156365898e-05, + "loss": 1.1866, + "step": 120 + }, + { + "epoch": 0.28360748723766305, + "grad_norm": 0.7699435353279114, + "learning_rate": 2.9847806654670893e-05, + "loss": 1.1462, + "step": 125 + }, + { + "epoch": 0.2949517867271696, + "grad_norm": 0.7696182131767273, + "learning_rate": 2.9819990859413647e-05, + "loss": 1.2205, + "step": 130 + }, + { + "epoch": 0.30629608621667614, + "grad_norm": 0.8980322480201721, + "learning_rate": 2.9789857121165498e-05, + "loss": 1.0351, + "step": 135 + }, + { + "epoch": 0.31764038570618264, + "grad_norm": 0.9249258041381836, + "learning_rate": 2.9757410153039167e-05, + "loss": 1.1157, + "step": 140 + }, + { + "epoch": 0.3289846851956892, + "grad_norm": 0.8738279938697815, + "learning_rate": 2.9722655029951572e-05, + "loss": 1.1472, + "step": 145 + }, + { + "epoch": 0.3403289846851957, + "grad_norm": 1.0156298875808716, + "learning_rate": 2.9685597187830082e-05, + "loss": 1.1149, + "step": 150 + }, + { + "epoch": 0.3516732841747022, + "grad_norm": 0.9642776846885681, + "learning_rate": 2.9646242422762315e-05, + "loss": 1.089, + "step": 155 + }, + { + "epoch": 0.3630175836642087, + "grad_norm": 0.945302426815033, + "learning_rate": 2.9604596890089567e-05, + "loss": 1.034, + "step": 160 + }, + { + "epoch": 0.37436188315371527, + "grad_norm": 0.9079117774963379, + "learning_rate": 2.9560667103444098e-05, + "loss": 1.0172, + "step": 165 + }, + { + "epoch": 0.38570618264322176, + "grad_norm": 1.0160084962844849, + "learning_rate": 2.9514459933730355e-05, + "loss": 0.9977, + "step": 170 + }, + { + "epoch": 0.3970504821327283, + "grad_norm": 1.064819097518921, + "learning_rate": 2.946598260805031e-05, + "loss": 1.0245, + "step": 175 + }, + { + "epoch": 0.4083947816222348, + "grad_norm": 1.1079357862472534, + "learning_rate": 2.9415242708573094e-05, + "loss": 0.9514, + "step": 180 + }, + { + "epoch": 0.41973908111174135, + "grad_norm": 0.9676417112350464, + "learning_rate": 2.936224817134911e-05, + "loss": 0.97, + "step": 185 + }, + { + "epoch": 0.43108338060124785, + "grad_norm": 0.994784414768219, + "learning_rate": 2.930700728506876e-05, + "loss": 0.9792, + "step": 190 + }, + { + "epoch": 0.4424276800907544, + "grad_norm": 0.9528241753578186, + "learning_rate": 2.9249528689766073e-05, + "loss": 0.9654, + "step": 195 + }, + { + "epoch": 0.45377197958026094, + "grad_norm": 1.0785036087036133, + "learning_rate": 2.918982137546731e-05, + "loss": 0.9112, + "step": 200 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 1.0346730947494507, + "learning_rate": 2.912789468078489e-05, + "loss": 0.926, + "step": 205 + }, + { + "epoch": 0.476460578559274, + "grad_norm": 1.1981546878814697, + "learning_rate": 2.9063758291456756e-05, + "loss": 0.9104, + "step": 210 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 1.0059852600097656, + "learning_rate": 2.8997422238831495e-05, + "loss": 0.8647, + "step": 215 + }, + { + "epoch": 0.499149177538287, + "grad_norm": 0.9627645611763, + "learning_rate": 2.8928896898299307e-05, + "loss": 0.8898, + "step": 220 + }, + { + "epoch": 0.5104934770277936, + "grad_norm": 1.127786636352539, + "learning_rate": 2.8858192987669303e-05, + "loss": 0.8559, + "step": 225 + }, + { + "epoch": 0.5218377765173, + "grad_norm": 1.0037412643432617, + "learning_rate": 2.87853215654931e-05, + "loss": 0.8711, + "step": 230 + }, + { + "epoch": 0.5331820760068066, + "grad_norm": 1.1410571336746216, + "learning_rate": 2.8710294029335243e-05, + "loss": 0.8226, + "step": 235 + }, + { + "epoch": 0.5445263754963131, + "grad_norm": 1.3303135633468628, + "learning_rate": 2.8633122113990518e-05, + "loss": 0.8389, + "step": 240 + }, + { + "epoch": 0.5558706749858197, + "grad_norm": 1.0364309549331665, + "learning_rate": 2.8553817889648575e-05, + "loss": 0.7983, + "step": 245 + }, + { + "epoch": 0.5672149744753261, + "grad_norm": 1.1705923080444336, + "learning_rate": 2.847239376000607e-05, + "loss": 0.7706, + "step": 250 + }, + { + "epoch": 0.5785592739648326, + "grad_norm": 1.1077048778533936, + "learning_rate": 2.838886246032663e-05, + "loss": 0.8076, + "step": 255 + }, + { + "epoch": 0.5899035734543392, + "grad_norm": 1.1420488357543945, + "learning_rate": 2.8303237055448984e-05, + "loss": 0.8215, + "step": 260 + }, + { + "epoch": 0.6012478729438457, + "grad_norm": 1.2016924619674683, + "learning_rate": 2.821553093774355e-05, + "loss": 0.841, + "step": 265 + }, + { + "epoch": 0.6125921724333523, + "grad_norm": 1.2077339887619019, + "learning_rate": 2.8125757825017755e-05, + "loss": 0.7359, + "step": 270 + }, + { + "epoch": 0.6239364719228587, + "grad_norm": 1.1575523614883423, + "learning_rate": 2.803393175837051e-05, + "loss": 0.7357, + "step": 275 + }, + { + "epoch": 0.6352807714123653, + "grad_norm": 1.1340813636779785, + "learning_rate": 2.7940067099996068e-05, + "loss": 0.7247, + "step": 280 + }, + { + "epoch": 0.6466250709018718, + "grad_norm": 1.2878059148788452, + "learning_rate": 2.7844178530937693e-05, + "loss": 0.7371, + "step": 285 + }, + { + "epoch": 0.6579693703913784, + "grad_norm": 1.1062008142471313, + "learning_rate": 2.774628104879144e-05, + "loss": 0.7789, + "step": 290 + }, + { + "epoch": 0.6693136698808848, + "grad_norm": 1.174922227859497, + "learning_rate": 2.7646389965360455e-05, + "loss": 0.7428, + "step": 295 + }, + { + "epoch": 0.6806579693703914, + "grad_norm": 1.2082350254058838, + "learning_rate": 2.7544520904260087e-05, + "loss": 0.7093, + "step": 300 + }, + { + "epoch": 0.6920022688598979, + "grad_norm": 1.3182717561721802, + "learning_rate": 2.7440689798474262e-05, + "loss": 0.6939, + "step": 305 + }, + { + "epoch": 0.7033465683494045, + "grad_norm": 1.2624369859695435, + "learning_rate": 2.733491288786347e-05, + "loss": 0.6429, + "step": 310 + }, + { + "epoch": 0.7146908678389109, + "grad_norm": 1.1729363203048706, + "learning_rate": 2.7227206716624716e-05, + "loss": 0.7084, + "step": 315 + }, + { + "epoch": 0.7260351673284174, + "grad_norm": 1.1850183010101318, + "learning_rate": 2.7117588130703936e-05, + "loss": 0.6475, + "step": 320 + }, + { + "epoch": 0.737379466817924, + "grad_norm": 1.262630581855774, + "learning_rate": 2.7006074275161158e-05, + "loss": 0.7102, + "step": 325 + }, + { + "epoch": 0.7487237663074305, + "grad_norm": 1.2882691621780396, + "learning_rate": 2.689268259148891e-05, + "loss": 0.6743, + "step": 330 + }, + { + "epoch": 0.7600680657969371, + "grad_norm": 1.3231333494186401, + "learning_rate": 2.677743081488425e-05, + "loss": 0.6677, + "step": 335 + }, + { + "epoch": 0.7714123652864435, + "grad_norm": 1.4054661989212036, + "learning_rate": 2.6660336971474892e-05, + "loss": 0.6294, + "step": 340 + }, + { + "epoch": 0.7827566647759501, + "grad_norm": 1.2727746963500977, + "learning_rate": 2.654141937549976e-05, + "loss": 0.5898, + "step": 345 + }, + { + "epoch": 0.7941009642654566, + "grad_norm": 1.4216852188110352, + "learning_rate": 2.642069662644456e-05, + "loss": 0.5933, + "step": 350 + }, + { + "epoch": 0.8054452637549632, + "grad_norm": 1.2403401136398315, + "learning_rate": 2.629818760613268e-05, + "loss": 0.5827, + "step": 355 + }, + { + "epoch": 0.8167895632444696, + "grad_norm": 1.4916515350341797, + "learning_rate": 2.6173911475771956e-05, + "loss": 0.6424, + "step": 360 + }, + { + "epoch": 0.8281338627339762, + "grad_norm": 1.1449263095855713, + "learning_rate": 2.6047887672957725e-05, + "loss": 0.6214, + "step": 365 + }, + { + "epoch": 0.8394781622234827, + "grad_norm": 1.2165684700012207, + "learning_rate": 2.5920135908632678e-05, + "loss": 0.5725, + "step": 370 + }, + { + "epoch": 0.8508224617129893, + "grad_norm": 1.349132776260376, + "learning_rate": 2.5790676164003916e-05, + "loss": 0.5866, + "step": 375 + }, + { + "epoch": 0.8621667612024957, + "grad_norm": 1.3089734315872192, + "learning_rate": 2.5659528687417785e-05, + "loss": 0.5896, + "step": 380 + }, + { + "epoch": 0.8735110606920022, + "grad_norm": 1.29347562789917, + "learning_rate": 2.552671399119287e-05, + "loss": 0.5583, + "step": 385 + }, + { + "epoch": 0.8848553601815088, + "grad_norm": 1.3677277565002441, + "learning_rate": 2.5392252848411788e-05, + "loss": 0.6271, + "step": 390 + }, + { + "epoch": 0.8961996596710153, + "grad_norm": 1.1058645248413086, + "learning_rate": 2.525616628967207e-05, + "loss": 0.6141, + "step": 395 + }, + { + "epoch": 0.9075439591605219, + "grad_norm": 1.2163079977035522, + "learning_rate": 2.511847559979691e-05, + "loss": 0.5435, + "step": 400 + }, + { + "epoch": 0.9188882586500283, + "grad_norm": 1.1860313415527344, + "learning_rate": 2.4979202314506025e-05, + "loss": 0.5458, + "step": 405 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 1.4231187105178833, + "learning_rate": 2.483836821704737e-05, + "loss": 0.5504, + "step": 410 + }, + { + "epoch": 0.9415768576290414, + "grad_norm": 1.4057279825210571, + "learning_rate": 2.4695995334790066e-05, + "loss": 0.5581, + "step": 415 + }, + { + "epoch": 0.952921157118548, + "grad_norm": 1.3381789922714233, + "learning_rate": 2.455210593577919e-05, + "loss": 0.4921, + "step": 420 + }, + { + "epoch": 0.9642654566080544, + "grad_norm": 1.2590646743774414, + "learning_rate": 2.440672252525291e-05, + "loss": 0.5041, + "step": 425 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 1.209778070449829, + "learning_rate": 2.425986784212252e-05, + "loss": 0.5108, + "step": 430 + }, + { + "epoch": 0.9869540555870675, + "grad_norm": 1.2509938478469849, + "learning_rate": 2.4111564855415896e-05, + "loss": 0.4973, + "step": 435 + }, + { + "epoch": 0.998298355076574, + "grad_norm": 1.3053215742111206, + "learning_rate": 2.3961836760685047e-05, + "loss": 0.5142, + "step": 440 + }, + { + "epoch": 1.0090754395916053, + "grad_norm": 1.4633337259292603, + "learning_rate": 2.3810706976378136e-05, + "loss": 0.4432, + "step": 445 + }, + { + "epoch": 1.0204197390811118, + "grad_norm": 1.183571696281433, + "learning_rate": 2.3658199140176696e-05, + "loss": 0.4437, + "step": 450 + }, + { + "epoch": 1.0317640385706182, + "grad_norm": 1.1689449548721313, + "learning_rate": 2.3504337105298534e-05, + "loss": 0.4011, + "step": 455 + }, + { + "epoch": 1.0431083380601247, + "grad_norm": 1.432166576385498, + "learning_rate": 2.3349144936766957e-05, + "loss": 0.3474, + "step": 460 + }, + { + "epoch": 1.0544526375496313, + "grad_norm": 1.2825556993484497, + "learning_rate": 2.3192646907646816e-05, + "loss": 0.4107, + "step": 465 + }, + { + "epoch": 1.0657969370391378, + "grad_norm": 1.3690298795700073, + "learning_rate": 2.3034867495248064e-05, + "loss": 0.4207, + "step": 470 + }, + { + "epoch": 1.0771412365286444, + "grad_norm": 1.5328559875488281, + "learning_rate": 2.2875831377297318e-05, + "loss": 0.438, + "step": 475 + }, + { + "epoch": 1.088485536018151, + "grad_norm": 1.2528537511825562, + "learning_rate": 2.2715563428078148e-05, + "loss": 0.3812, + "step": 480 + }, + { + "epoch": 1.0998298355076574, + "grad_norm": 1.232212781906128, + "learning_rate": 2.2554088714540523e-05, + "loss": 0.3992, + "step": 485 + }, + { + "epoch": 1.111174134997164, + "grad_norm": 1.1943590641021729, + "learning_rate": 2.239143249238021e-05, + "loss": 0.4002, + "step": 490 + }, + { + "epoch": 1.1225184344866705, + "grad_norm": 1.459253191947937, + "learning_rate": 2.2227620202088622e-05, + "loss": 0.3377, + "step": 495 + }, + { + "epoch": 1.1338627339761769, + "grad_norm": 1.211742639541626, + "learning_rate": 2.2062677464973737e-05, + "loss": 0.378, + "step": 500 + }, + { + "epoch": 1.1452070334656834, + "grad_norm": 1.2123786211013794, + "learning_rate": 2.1896630079152774e-05, + "loss": 0.3717, + "step": 505 + }, + { + "epoch": 1.15655133295519, + "grad_norm": 1.5497947931289673, + "learning_rate": 2.1729504015517203e-05, + "loss": 0.3956, + "step": 510 + }, + { + "epoch": 1.1678956324446965, + "grad_norm": 1.3804091215133667, + "learning_rate": 2.1561325413670713e-05, + "loss": 0.364, + "step": 515 + }, + { + "epoch": 1.179239931934203, + "grad_norm": 1.2288784980773926, + "learning_rate": 2.139212057784082e-05, + "loss": 0.4332, + "step": 520 + }, + { + "epoch": 1.1905842314237096, + "grad_norm": 1.3379660844802856, + "learning_rate": 2.1221915972764717e-05, + "loss": 0.3535, + "step": 525 + }, + { + "epoch": 1.2019285309132162, + "grad_norm": 1.2405771017074585, + "learning_rate": 2.105073821955001e-05, + "loss": 0.342, + "step": 530 + }, + { + "epoch": 1.2132728304027227, + "grad_norm": 1.3663208484649658, + "learning_rate": 2.0878614091510995e-05, + "loss": 0.3907, + "step": 535 + }, + { + "epoch": 1.2246171298922293, + "grad_norm": 1.4096938371658325, + "learning_rate": 2.0705570509981158e-05, + "loss": 0.3687, + "step": 540 + }, + { + "epoch": 1.2359614293817356, + "grad_norm": 1.416892647743225, + "learning_rate": 2.0531634540102496e-05, + "loss": 0.3108, + "step": 545 + }, + { + "epoch": 1.2473057288712421, + "grad_norm": 1.2536832094192505, + "learning_rate": 2.035683338659234e-05, + "loss": 0.3625, + "step": 550 + }, + { + "epoch": 1.2586500283607487, + "grad_norm": 1.3575513362884521, + "learning_rate": 2.0181194389488375e-05, + "loss": 0.3571, + "step": 555 + }, + { + "epoch": 1.2699943278502552, + "grad_norm": 1.419228196144104, + "learning_rate": 2.0004745019872472e-05, + "loss": 0.2992, + "step": 560 + }, + { + "epoch": 1.2813386273397618, + "grad_norm": 1.2644859552383423, + "learning_rate": 1.982751287557405e-05, + "loss": 0.3539, + "step": 565 + }, + { + "epoch": 1.2926829268292683, + "grad_norm": 1.2232836484909058, + "learning_rate": 1.9649525676853553e-05, + "loss": 0.3955, + "step": 570 + }, + { + "epoch": 1.3040272263187749, + "grad_norm": 1.3128654956817627, + "learning_rate": 1.947081126206687e-05, + "loss": 0.3108, + "step": 575 + }, + { + "epoch": 1.3153715258082812, + "grad_norm": 1.4236958026885986, + "learning_rate": 1.929139758331122e-05, + "loss": 0.3626, + "step": 580 + }, + { + "epoch": 1.326715825297788, + "grad_norm": 1.2174386978149414, + "learning_rate": 1.9111312702053233e-05, + "loss": 0.3565, + "step": 585 + }, + { + "epoch": 1.3380601247872943, + "grad_norm": 1.3958114385604858, + "learning_rate": 1.893058478473999e-05, + "loss": 0.334, + "step": 590 + }, + { + "epoch": 1.3494044242768009, + "grad_norm": 1.2618368864059448, + "learning_rate": 1.87492420983936e-05, + "loss": 0.2762, + "step": 595 + }, + { + "epoch": 1.3607487237663074, + "grad_norm": 1.3615690469741821, + "learning_rate": 1.8567313006190042e-05, + "loss": 0.3348, + "step": 600 + }, + { + "epoch": 1.372093023255814, + "grad_norm": 1.21652352809906, + "learning_rate": 1.838482596302299e-05, + "loss": 0.3447, + "step": 605 + }, + { + "epoch": 1.3834373227453205, + "grad_norm": 1.9371083974838257, + "learning_rate": 1.8201809511053263e-05, + "loss": 0.3502, + "step": 610 + }, + { + "epoch": 1.394781622234827, + "grad_norm": 1.4073091745376587, + "learning_rate": 1.8018292275244693e-05, + "loss": 0.3128, + "step": 615 + }, + { + "epoch": 1.4061259217243336, + "grad_norm": 1.469925880432129, + "learning_rate": 1.7834302958886926e-05, + "loss": 0.3166, + "step": 620 + }, + { + "epoch": 1.41747022121384, + "grad_norm": 1.5013693571090698, + "learning_rate": 1.764987033910611e-05, + "loss": 0.3099, + "step": 625 + }, + { + "epoch": 1.4288145207033467, + "grad_norm": 1.4330624341964722, + "learning_rate": 1.7465023262363904e-05, + "loss": 0.3213, + "step": 630 + }, + { + "epoch": 1.440158820192853, + "grad_norm": 1.462883710861206, + "learning_rate": 1.727979063994576e-05, + "loss": 0.3244, + "step": 635 + }, + { + "epoch": 1.4515031196823596, + "grad_norm": 1.3327817916870117, + "learning_rate": 1.7094201443438955e-05, + "loss": 0.2568, + "step": 640 + }, + { + "epoch": 1.4628474191718661, + "grad_norm": 1.4149161577224731, + "learning_rate": 1.6908284700201295e-05, + "loss": 0.2811, + "step": 645 + }, + { + "epoch": 1.4741917186613727, + "grad_norm": 1.2790780067443848, + "learning_rate": 1.6722069488821012e-05, + "loss": 0.258, + "step": 650 + }, + { + "epoch": 1.4855360181508792, + "grad_norm": 1.2953827381134033, + "learning_rate": 1.6535584934568708e-05, + "loss": 0.2758, + "step": 655 + }, + { + "epoch": 1.4968803176403858, + "grad_norm": 1.348848581314087, + "learning_rate": 1.6348860204841948e-05, + "loss": 0.2488, + "step": 660 + }, + { + "epoch": 1.5082246171298923, + "grad_norm": 1.290279507637024, + "learning_rate": 1.6161924504603325e-05, + "loss": 0.2982, + "step": 665 + }, + { + "epoch": 1.5195689166193986, + "grad_norm": 1.2636975049972534, + "learning_rate": 1.597480707181257e-05, + "loss": 0.3073, + "step": 670 + }, + { + "epoch": 1.5309132161089054, + "grad_norm": 1.3808029890060425, + "learning_rate": 1.5787537172853582e-05, + "loss": 0.2782, + "step": 675 + }, + { + "epoch": 1.5422575155984117, + "grad_norm": 1.2945713996887207, + "learning_rate": 1.5600144097956955e-05, + "loss": 0.2659, + "step": 680 + }, + { + "epoch": 1.5536018150879183, + "grad_norm": 1.3140168190002441, + "learning_rate": 1.5412657156618802e-05, + "loss": 0.2941, + "step": 685 + }, + { + "epoch": 1.5649461145774248, + "grad_norm": 1.1707082986831665, + "learning_rate": 1.5225105673016569e-05, + "loss": 0.2436, + "step": 690 + }, + { + "epoch": 1.5762904140669314, + "grad_norm": 1.4172064065933228, + "learning_rate": 1.503751898142251e-05, + "loss": 0.2465, + "step": 695 + }, + { + "epoch": 1.587634713556438, + "grad_norm": 1.4871270656585693, + "learning_rate": 1.484992642161565e-05, + "loss": 0.2684, + "step": 700 + }, + { + "epoch": 1.5989790130459443, + "grad_norm": 1.2666114568710327, + "learning_rate": 1.466235733429285e-05, + "loss": 0.2468, + "step": 705 + }, + { + "epoch": 1.610323312535451, + "grad_norm": 1.2191888093948364, + "learning_rate": 1.44748410564797e-05, + "loss": 0.25, + "step": 710 + }, + { + "epoch": 1.6216676120249574, + "grad_norm": 1.4250890016555786, + "learning_rate": 1.4287406916942067e-05, + "loss": 0.2787, + "step": 715 + }, + { + "epoch": 1.6330119115144641, + "grad_norm": 1.1890467405319214, + "learning_rate": 1.410008423159883e-05, + "loss": 0.2147, + "step": 720 + }, + { + "epoch": 1.6443562110039704, + "grad_norm": 1.32356858253479, + "learning_rate": 1.3912902298936718e-05, + "loss": 0.2323, + "step": 725 + }, + { + "epoch": 1.655700510493477, + "grad_norm": 1.2933353185653687, + "learning_rate": 1.3725890395427832e-05, + "loss": 0.2575, + "step": 730 + }, + { + "epoch": 1.6670448099829835, + "grad_norm": 1.1312745809555054, + "learning_rate": 1.3539077770950602e-05, + "loss": 0.2482, + "step": 735 + }, + { + "epoch": 1.67838910947249, + "grad_norm": 1.55691397190094, + "learning_rate": 1.335249364421491e-05, + "loss": 0.2245, + "step": 740 + }, + { + "epoch": 1.6897334089619966, + "grad_norm": 1.2834844589233398, + "learning_rate": 1.316616719819212e-05, + "loss": 0.1926, + "step": 745 + }, + { + "epoch": 1.701077708451503, + "grad_norm": 1.277695894241333, + "learning_rate": 1.298012757555065e-05, + "loss": 0.1878, + "step": 750 + }, + { + "epoch": 1.7124220079410097, + "grad_norm": 1.3677343130111694, + "learning_rate": 1.279440387409788e-05, + "loss": 0.22, + "step": 755 + }, + { + "epoch": 1.723766307430516, + "grad_norm": 1.3113089799880981, + "learning_rate": 1.2609025142229049e-05, + "loss": 0.227, + "step": 760 + }, + { + "epoch": 1.7351106069200228, + "grad_norm": 1.164533019065857, + "learning_rate": 1.2424020374383914e-05, + "loss": 0.2014, + "step": 765 + }, + { + "epoch": 1.7464549064095292, + "grad_norm": 1.3280812501907349, + "learning_rate": 1.2239418506511836e-05, + "loss": 0.1982, + "step": 770 + }, + { + "epoch": 1.7577992058990357, + "grad_norm": 1.5715361833572388, + "learning_rate": 1.2055248411545986e-05, + "loss": 0.2524, + "step": 775 + }, + { + "epoch": 1.7691435053885423, + "grad_norm": 1.1785355806350708, + "learning_rate": 1.1871538894887443e-05, + "loss": 0.212, + "step": 780 + }, + { + "epoch": 1.7804878048780488, + "grad_norm": 1.3679323196411133, + "learning_rate": 1.1688318689899852e-05, + "loss": 0.1955, + "step": 785 + }, + { + "epoch": 1.7918321043675554, + "grad_norm": 1.2477246522903442, + "learning_rate": 1.150561645341532e-05, + "loss": 0.2026, + "step": 790 + }, + { + "epoch": 1.8031764038570617, + "grad_norm": 1.3043060302734375, + "learning_rate": 1.1323460761252323e-05, + "loss": 0.1826, + "step": 795 + }, + { + "epoch": 1.8145207033465685, + "grad_norm": 1.2714372873306274, + "learning_rate": 1.114188010374623e-05, + "loss": 0.1966, + "step": 800 + }, + { + "epoch": 1.8258650028360748, + "grad_norm": 1.246087670326233, + "learning_rate": 1.0960902881293259e-05, + "loss": 0.1868, + "step": 805 + }, + { + "epoch": 1.8372093023255816, + "grad_norm": 1.3521490097045898, + "learning_rate": 1.0780557399908465e-05, + "loss": 0.2278, + "step": 810 + }, + { + "epoch": 1.8485536018150879, + "grad_norm": 1.3960517644882202, + "learning_rate": 1.0600871866798486e-05, + "loss": 0.2177, + "step": 815 + }, + { + "epoch": 1.8598979013045944, + "grad_norm": 1.5459634065628052, + "learning_rate": 1.0421874385949744e-05, + "loss": 0.2075, + "step": 820 + }, + { + "epoch": 1.871242200794101, + "grad_norm": 1.168817400932312, + "learning_rate": 1.0243592953732828e-05, + "loss": 0.1778, + "step": 825 + }, + { + "epoch": 1.8825865002836075, + "grad_norm": 1.229718804359436, + "learning_rate": 1.0066055454523651e-05, + "loss": 0.196, + "step": 830 + }, + { + "epoch": 1.893930799773114, + "grad_norm": 1.3391629457473755, + "learning_rate": 9.889289656342179e-06, + "loss": 0.1683, + "step": 835 + }, + { + "epoch": 1.9052750992626204, + "grad_norm": 1.2143852710723877, + "learning_rate": 9.713323206509292e-06, + "loss": 0.1961, + "step": 840 + }, + { + "epoch": 1.9166193987521272, + "grad_norm": 1.2482773065567017, + "learning_rate": 9.538183627322604e-06, + "loss": 0.1851, + "step": 845 + }, + { + "epoch": 1.9279636982416335, + "grad_norm": 1.2379337549209595, + "learning_rate": 9.363898311751779e-06, + "loss": 0.1541, + "step": 850 + }, + { + "epoch": 1.9393079977311403, + "grad_norm": 1.191713809967041, + "learning_rate": 9.190494519154093e-06, + "loss": 0.1632, + "step": 855 + }, + { + "epoch": 1.9506522972206466, + "grad_norm": 1.291970133781433, + "learning_rate": 9.017999371010896e-06, + "loss": 0.1869, + "step": 860 + }, + { + "epoch": 1.9619965967101531, + "grad_norm": 1.3093546628952026, + "learning_rate": 8.846439846685619e-06, + "loss": 0.1607, + "step": 865 + }, + { + "epoch": 1.9733408961996597, + "grad_norm": 1.12217116355896, + "learning_rate": 8.67584277920406e-06, + "loss": 0.1845, + "step": 870 + }, + { + "epoch": 1.9846851956891662, + "grad_norm": 0.988784670829773, + "learning_rate": 8.506234851057494e-06, + "loss": 0.1681, + "step": 875 + }, + { + "epoch": 1.9960294951786728, + "grad_norm": 1.3125916719436646, + "learning_rate": 8.33764259002937e-06, + "loss": 0.1483, + "step": 880 + }, + { + "epoch": 2.006806579693704, + "grad_norm": 0.9683915972709656, + "learning_rate": 8.170092365046194e-06, + "loss": 0.1572, + "step": 885 + }, + { + "epoch": 2.0181508791832106, + "grad_norm": 1.1071760654449463, + "learning_rate": 8.003610382053252e-06, + "loss": 0.1308, + "step": 890 + }, + { + "epoch": 2.029495178672717, + "grad_norm": 1.298649549484253, + "learning_rate": 7.83822267991583e-06, + "loss": 0.118, + "step": 895 + }, + { + "epoch": 2.0408394781622237, + "grad_norm": 1.1196600198745728, + "learning_rate": 7.673955126346589e-06, + "loss": 0.161, + "step": 900 + }, + { + "epoch": 2.05218377765173, + "grad_norm": 1.375549077987671, + "learning_rate": 7.5108334138596335e-06, + "loss": 0.1396, + "step": 905 + }, + { + "epoch": 2.0635280771412363, + "grad_norm": 1.0669500827789307, + "learning_rate": 7.3488830557520815e-06, + "loss": 0.1549, + "step": 910 + }, + { + "epoch": 2.074872376630743, + "grad_norm": 1.086495041847229, + "learning_rate": 7.188129382113624e-06, + "loss": 0.1339, + "step": 915 + }, + { + "epoch": 2.0862166761202494, + "grad_norm": 0.9276008009910583, + "learning_rate": 7.028597535864695e-06, + "loss": 0.1358, + "step": 920 + }, + { + "epoch": 2.097560975609756, + "grad_norm": 0.9916872382164001, + "learning_rate": 6.870312468823965e-06, + "loss": 0.132, + "step": 925 + }, + { + "epoch": 2.1089052750992625, + "grad_norm": 1.1387484073638916, + "learning_rate": 6.713298937805755e-06, + "loss": 0.1272, + "step": 930 + }, + { + "epoch": 2.1202495745887693, + "grad_norm": 1.4319218397140503, + "learning_rate": 6.557581500747856e-06, + "loss": 0.1125, + "step": 935 + }, + { + "epoch": 2.1315938740782756, + "grad_norm": 1.3258739709854126, + "learning_rate": 6.403184512870544e-06, + "loss": 0.1348, + "step": 940 + }, + { + "epoch": 2.1429381735677824, + "grad_norm": 1.131720781326294, + "learning_rate": 6.25013212286724e-06, + "loss": 0.1235, + "step": 945 + }, + { + "epoch": 2.1542824730572887, + "grad_norm": 1.1326483488082886, + "learning_rate": 6.098448269127522e-06, + "loss": 0.1203, + "step": 950 + }, + { + "epoch": 2.165626772546795, + "grad_norm": 1.5706323385238647, + "learning_rate": 5.948156675992982e-06, + "loss": 0.1158, + "step": 955 + }, + { + "epoch": 2.176971072036302, + "grad_norm": 1.066754937171936, + "learning_rate": 5.799280850046603e-06, + "loss": 0.1191, + "step": 960 + }, + { + "epoch": 2.188315371525808, + "grad_norm": 0.9784988164901733, + "learning_rate": 5.651844076436165e-06, + "loss": 0.1228, + "step": 965 + }, + { + "epoch": 2.199659671015315, + "grad_norm": 1.0010255575180054, + "learning_rate": 5.505869415232299e-06, + "loss": 0.1331, + "step": 970 + }, + { + "epoch": 2.2110039705048212, + "grad_norm": 1.1350961923599243, + "learning_rate": 5.361379697821742e-06, + "loss": 0.1315, + "step": 975 + }, + { + "epoch": 2.222348269994328, + "grad_norm": 1.0257408618927002, + "learning_rate": 5.218397523336375e-06, + "loss": 0.1399, + "step": 980 + }, + { + "epoch": 2.2336925694838343, + "grad_norm": 1.0193580389022827, + "learning_rate": 5.07694525511853e-06, + "loss": 0.111, + "step": 985 + }, + { + "epoch": 2.245036868973341, + "grad_norm": 0.9272916316986084, + "learning_rate": 4.937045017223265e-06, + "loss": 0.1155, + "step": 990 + }, + { + "epoch": 2.2563811684628474, + "grad_norm": 0.9544349312782288, + "learning_rate": 4.798718690957999e-06, + "loss": 0.1349, + "step": 995 + }, + { + "epoch": 2.2677254679523537, + "grad_norm": 0.8948957324028015, + "learning_rate": 4.6619879114601255e-06, + "loss": 0.1015, + "step": 1000 + }, + { + "epoch": 2.2790697674418605, + "grad_norm": 0.9136641025543213, + "learning_rate": 4.526874064313131e-06, + "loss": 0.1115, + "step": 1005 + }, + { + "epoch": 2.290414066931367, + "grad_norm": 1.1017730236053467, + "learning_rate": 4.393398282201788e-06, + "loss": 0.1015, + "step": 1010 + }, + { + "epoch": 2.3017583664208736, + "grad_norm": 0.9441242218017578, + "learning_rate": 4.261581441606824e-06, + "loss": 0.125, + "step": 1015 + }, + { + "epoch": 2.31310266591038, + "grad_norm": 0.9149470329284668, + "learning_rate": 4.1314441595397156e-06, + "loss": 0.1116, + "step": 1020 + }, + { + "epoch": 2.3244469653998867, + "grad_norm": 1.1438705921173096, + "learning_rate": 4.00300679031808e-06, + "loss": 0.1063, + "step": 1025 + }, + { + "epoch": 2.335791264889393, + "grad_norm": 1.2796733379364014, + "learning_rate": 3.876289422382078e-06, + "loss": 0.1263, + "step": 1030 + }, + { + "epoch": 2.3471355643789, + "grad_norm": 1.1510380506515503, + "learning_rate": 3.7513118751524934e-06, + "loss": 0.1071, + "step": 1035 + }, + { + "epoch": 2.358479863868406, + "grad_norm": 0.9727523922920227, + "learning_rate": 3.628093695930836e-06, + "loss": 0.1337, + "step": 1040 + }, + { + "epoch": 2.3698241633579125, + "grad_norm": 2.70743727684021, + "learning_rate": 3.50665415684201e-06, + "loss": 0.1261, + "step": 1045 + }, + { + "epoch": 2.3811684628474192, + "grad_norm": 1.1585594415664673, + "learning_rate": 3.3870122518200134e-06, + "loss": 0.0989, + "step": 1050 + }, + { + "epoch": 2.3925127623369256, + "grad_norm": 1.05830717086792, + "learning_rate": 3.269186693637208e-06, + "loss": 0.1135, + "step": 1055 + }, + { + "epoch": 2.4038570618264323, + "grad_norm": 0.9199517369270325, + "learning_rate": 3.153195910977475e-06, + "loss": 0.102, + "step": 1060 + }, + { + "epoch": 2.4152013613159387, + "grad_norm": 1.0779012441635132, + "learning_rate": 3.039058045553872e-06, + "loss": 0.1197, + "step": 1065 + }, + { + "epoch": 2.4265456608054454, + "grad_norm": 1.0280113220214844, + "learning_rate": 2.9267909492711447e-06, + "loss": 0.1044, + "step": 1070 + }, + { + "epoch": 2.4378899602949518, + "grad_norm": 0.8546344041824341, + "learning_rate": 2.816412181433574e-06, + "loss": 0.1112, + "step": 1075 + }, + { + "epoch": 2.4492342597844585, + "grad_norm": 0.8314358592033386, + "learning_rate": 2.7079390059985835e-06, + "loss": 0.1189, + "step": 1080 + }, + { + "epoch": 2.460578559273965, + "grad_norm": 1.2652233839035034, + "learning_rate": 2.6013883888765533e-06, + "loss": 0.1258, + "step": 1085 + }, + { + "epoch": 2.471922858763471, + "grad_norm": 0.9996100664138794, + "learning_rate": 2.4967769952772284e-06, + "loss": 0.0929, + "step": 1090 + }, + { + "epoch": 2.483267158252978, + "grad_norm": 0.951572597026825, + "learning_rate": 2.394121187103184e-06, + "loss": 0.1036, + "step": 1095 + }, + { + "epoch": 2.4946114577424843, + "grad_norm": 0.7865042090415955, + "learning_rate": 2.293437020390701e-06, + "loss": 0.0922, + "step": 1100 + }, + { + "epoch": 2.505955757231991, + "grad_norm": 0.9021437764167786, + "learning_rate": 2.194740242798528e-06, + "loss": 0.1214, + "step": 1105 + }, + { + "epoch": 2.5173000567214974, + "grad_norm": 0.9650456309318542, + "learning_rate": 2.0980462911448028e-06, + "loss": 0.1046, + "step": 1110 + }, + { + "epoch": 2.5286443562110037, + "grad_norm": 0.787787914276123, + "learning_rate": 2.003370288992666e-06, + "loss": 0.0931, + "step": 1115 + }, + { + "epoch": 2.5399886557005105, + "grad_norm": 0.8400007486343384, + "learning_rate": 1.9107270442848305e-06, + "loss": 0.1148, + "step": 1120 + }, + { + "epoch": 2.5513329551900172, + "grad_norm": 0.8822923898696899, + "learning_rate": 1.8201310470275174e-06, + "loss": 0.0917, + "step": 1125 + }, + { + "epoch": 2.5626772546795236, + "grad_norm": 1.0914690494537354, + "learning_rate": 1.7315964670241164e-06, + "loss": 0.0904, + "step": 1130 + }, + { + "epoch": 2.57402155416903, + "grad_norm": 0.811316967010498, + "learning_rate": 1.6451371516589636e-06, + "loss": 0.1137, + "step": 1135 + }, + { + "epoch": 2.5853658536585367, + "grad_norm": 1.09565007686615, + "learning_rate": 1.5607666237314927e-06, + "loss": 0.1076, + "step": 1140 + }, + { + "epoch": 2.596710153148043, + "grad_norm": 1.0327187776565552, + "learning_rate": 1.4784980793411985e-06, + "loss": 0.0977, + "step": 1145 + }, + { + "epoch": 2.6080544526375498, + "grad_norm": 0.9040313363075256, + "learning_rate": 1.3983443858236677e-06, + "loss": 0.1214, + "step": 1150 + }, + { + "epoch": 2.619398752127056, + "grad_norm": 0.8656250834465027, + "learning_rate": 1.3203180797380583e-06, + "loss": 0.1209, + "step": 1155 + }, + { + "epoch": 2.6307430516165624, + "grad_norm": 0.6903753876686096, + "learning_rate": 1.2444313649062877e-06, + "loss": 0.1072, + "step": 1160 + }, + { + "epoch": 2.642087351106069, + "grad_norm": 0.9063003659248352, + "learning_rate": 1.1706961105042835e-06, + "loss": 0.1135, + "step": 1165 + }, + { + "epoch": 2.653431650595576, + "grad_norm": 0.8137299418449402, + "learning_rate": 1.099123849205565e-06, + "loss": 0.0867, + "step": 1170 + }, + { + "epoch": 2.6647759500850823, + "grad_norm": 0.8302875757217407, + "learning_rate": 1.029725775377452e-06, + "loss": 0.0827, + "step": 1175 + }, + { + "epoch": 2.6761202495745886, + "grad_norm": 0.8479173183441162, + "learning_rate": 9.625127433302082e-07, + "loss": 0.1003, + "step": 1180 + }, + { + "epoch": 2.6874645490640954, + "grad_norm": 0.7246958017349243, + "learning_rate": 8.974952656193403e-07, + "loss": 0.0855, + "step": 1185 + }, + { + "epoch": 2.6988088485536017, + "grad_norm": 0.7895841002464294, + "learning_rate": 8.346835114013713e-07, + "loss": 0.0984, + "step": 1190 + }, + { + "epoch": 2.7101531480431085, + "grad_norm": 0.8702403903007507, + "learning_rate": 7.740873048433212e-07, + "loss": 0.0939, + "step": 1195 + }, + { + "epoch": 2.721497447532615, + "grad_norm": 0.8283234238624573, + "learning_rate": 7.157161235861404e-07, + "loss": 0.1102, + "step": 1200 + }, + { + "epoch": 2.732841747022121, + "grad_norm": 0.9378776550292969, + "learning_rate": 6.595790972623505e-07, + "loss": 0.1187, + "step": 1205 + }, + { + "epoch": 2.744186046511628, + "grad_norm": 0.8255746960639954, + "learning_rate": 6.056850060680985e-07, + "loss": 0.0905, + "step": 1210 + }, + { + "epoch": 2.7555303460011347, + "grad_norm": 0.8047561049461365, + "learning_rate": 5.540422793898881e-07, + "loss": 0.0937, + "step": 1215 + }, + { + "epoch": 2.766874645490641, + "grad_norm": 0.726526141166687, + "learning_rate": 5.046589944861679e-07, + "loss": 0.1072, + "step": 1220 + }, + { + "epoch": 2.7782189449801473, + "grad_norm": 0.7481055855751038, + "learning_rate": 4.5754287522398575e-07, + "loss": 0.0825, + "step": 1225 + }, + { + "epoch": 2.789563244469654, + "grad_norm": 0.830743134021759, + "learning_rate": 4.127012908709427e-07, + "loss": 0.1105, + "step": 1230 + }, + { + "epoch": 2.8009075439591604, + "grad_norm": 0.8405081629753113, + "learning_rate": 3.70141254942572e-07, + "loss": 0.1071, + "step": 1235 + }, + { + "epoch": 2.812251843448667, + "grad_norm": 0.8794842958450317, + "learning_rate": 3.298694241053901e-07, + "loss": 0.0832, + "step": 1240 + }, + { + "epoch": 2.8235961429381735, + "grad_norm": 0.9151767492294312, + "learning_rate": 2.9189209713575914e-07, + "loss": 0.0958, + "step": 1245 + }, + { + "epoch": 2.83494044242768, + "grad_norm": 0.8882445096969604, + "learning_rate": 2.5621521393470017e-07, + "loss": 0.0861, + "step": 1250 + }, + { + "epoch": 2.8462847419171866, + "grad_norm": 0.7890960574150085, + "learning_rate": 2.2284435459885954e-07, + "loss": 0.097, + "step": 1255 + }, + { + "epoch": 2.8576290414066934, + "grad_norm": 0.7176119685173035, + "learning_rate": 1.9178473854775558e-07, + "loss": 0.0943, + "step": 1260 + }, + { + "epoch": 2.8689733408961997, + "grad_norm": 0.7787429094314575, + "learning_rate": 1.630412237074147e-07, + "loss": 0.0945, + "step": 1265 + }, + { + "epoch": 2.880317640385706, + "grad_norm": 0.7373933792114258, + "learning_rate": 1.3661830575056765e-07, + "loss": 0.0928, + "step": 1270 + }, + { + "epoch": 2.891661939875213, + "grad_norm": 0.7886492609977722, + "learning_rate": 1.1252011739349366e-07, + "loss": 0.0956, + "step": 1275 + }, + { + "epoch": 2.903006239364719, + "grad_norm": 0.8961607813835144, + "learning_rate": 9.075042774963405e-08, + "loss": 0.0971, + "step": 1280 + }, + { + "epoch": 2.914350538854226, + "grad_norm": 0.7748850584030151, + "learning_rate": 7.131264174008722e-08, + "loss": 0.0884, + "step": 1285 + }, + { + "epoch": 2.9256948383437322, + "grad_norm": 0.898790717124939, + "learning_rate": 5.4209799561049656e-08, + "loss": 0.0933, + "step": 1290 + }, + { + "epoch": 2.9370391378332386, + "grad_norm": 0.8125585913658142, + "learning_rate": 3.9444576208311214e-08, + "loss": 0.1051, + "step": 1295 + }, + { + "epoch": 2.9483834373227453, + "grad_norm": 0.7938846349716187, + "learning_rate": 2.701928105886653e-08, + "loss": 0.0971, + "step": 1300 + }, + { + "epoch": 2.959727736812252, + "grad_norm": 0.7347404956817627, + "learning_rate": 1.69358575097206e-08, + "loss": 0.0813, + "step": 1305 + }, + { + "epoch": 2.9710720363017584, + "grad_norm": 0.8305047154426575, + "learning_rate": 9.195882673916912e-09, + "loss": 0.0947, + "step": 1310 + }, + { + "epoch": 2.9824163357912647, + "grad_norm": 0.8242459893226624, + "learning_rate": 3.800567133879773e-09, + "loss": 0.1016, + "step": 1315 + }, + { + "epoch": 2.9937606352807715, + "grad_norm": 0.7967644929885864, + "learning_rate": 7.507547520591018e-10, + "loss": 0.1171, + "step": 1320 + }, + { + "epoch": 3.0, + "step": 1323, + "total_flos": 1.6618542086655836e+18, + "train_loss": 0.44569373376335236, + "train_runtime": 1314.7893, + "train_samples_per_second": 32.166, + "train_steps_per_second": 1.006 + } + ], + "logging_steps": 5, + "max_steps": 1323, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6618542086655836e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8faf53d7679079fcb6871480abb64db5d990f71e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/7_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ca2ac19b8c8aa1b8705d2f9857285ace5b9c49f6fced772a78488aba440779 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..261c1cde858a2a05283981a60ada4d3020225dd7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 8_128_e3_3e-5 + results: [] +--- + + + +# 8_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6724246b198e9efc8da00bdd5e5a1c2b94e9253 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "up_proj", + "o_proj", + "gate_proj", + "q_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..526589f7f4e87d6af9cbaec84bd1918fa9c4c21b --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f77873a8a87ec9c8c9019a831b76b16bf25e96a16ee6cdb8fab393085cbfbe2 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..004b88137c64c8033feae595683e9b8396f6935a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3742123318855598e+18, + "train_loss": 0.4448279767130995, + "train_runtime": 1090.317, + "train_samples": 11288, + "train_samples_per_second": 31.059, + "train_steps_per_second": 0.971 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..004b88137c64c8033feae595683e9b8396f6935a --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3742123318855598e+18, + "train_loss": 0.4448279767130995, + "train_runtime": 1090.317, + "train_samples": 11288, + "train_samples_per_second": 31.059, + "train_steps_per_second": 0.971 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1c256d2b62a1470d43ee5a363a7c2ab034305a4 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1520 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1059, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014174344436569808, + "grad_norm": 0.7642120718955994, + "learning_rate": 2.2641509433962262e-06, + "loss": 1.5515, + "step": 5 + }, + { + "epoch": 0.028348688873139617, + "grad_norm": 0.5646042227745056, + "learning_rate": 5.094339622641509e-06, + "loss": 1.5728, + "step": 10 + }, + { + "epoch": 0.042523033309709427, + "grad_norm": 0.5350164771080017, + "learning_rate": 7.924528301886793e-06, + "loss": 1.5435, + "step": 15 + }, + { + "epoch": 0.05669737774627923, + "grad_norm": 0.47502878308296204, + "learning_rate": 1.0754716981132076e-05, + "loss": 1.5319, + "step": 20 + }, + { + "epoch": 0.07087172218284904, + "grad_norm": 0.6824010610580444, + "learning_rate": 1.358490566037736e-05, + "loss": 1.4541, + "step": 25 + }, + { + "epoch": 0.08504606661941885, + "grad_norm": 0.6240067481994629, + "learning_rate": 1.6415094339622643e-05, + "loss": 1.4956, + "step": 30 + }, + { + "epoch": 0.09922041105598867, + "grad_norm": 0.5455007553100586, + "learning_rate": 1.9245283018867924e-05, + "loss": 1.46, + "step": 35 + }, + { + "epoch": 0.11339475549255847, + "grad_norm": 0.684130847454071, + "learning_rate": 2.2075471698113208e-05, + "loss": 1.4845, + "step": 40 + }, + { + "epoch": 0.12756909992912827, + "grad_norm": 0.5220994353294373, + "learning_rate": 2.4905660377358492e-05, + "loss": 1.4412, + "step": 45 + }, + { + "epoch": 0.14174344436569808, + "grad_norm": 0.6968532204627991, + "learning_rate": 2.7735849056603773e-05, + "loss": 1.411, + "step": 50 + }, + { + "epoch": 0.1559177888022679, + "grad_norm": 0.49486619234085083, + "learning_rate": 2.999992685835993e-05, + "loss": 1.3645, + "step": 55 + }, + { + "epoch": 0.1700921332388377, + "grad_norm": 0.56968754529953, + "learning_rate": 2.9997366975852434e-05, + "loss": 1.3938, + "step": 60 + }, + { + "epoch": 0.18426647767540752, + "grad_norm": 0.8328511714935303, + "learning_rate": 2.999115072460336e-05, + "loss": 1.2991, + "step": 65 + }, + { + "epoch": 0.19844082211197733, + "grad_norm": 0.5932506918907166, + "learning_rate": 2.998127962013918e-05, + "loss": 1.3999, + "step": 70 + }, + { + "epoch": 0.21261516654854712, + "grad_norm": 0.5138024687767029, + "learning_rate": 2.9967756069042192e-05, + "loss": 1.2968, + "step": 75 + }, + { + "epoch": 0.22678951098511693, + "grad_norm": 0.6478395462036133, + "learning_rate": 2.9950583368363774e-05, + "loss": 1.2439, + "step": 80 + }, + { + "epoch": 0.24096385542168675, + "grad_norm": 0.7490803599357605, + "learning_rate": 2.9929765704820574e-05, + "loss": 1.2468, + "step": 85 + }, + { + "epoch": 0.25513819985825653, + "grad_norm": 0.7508903741836548, + "learning_rate": 2.9905308153773778e-05, + "loss": 1.2009, + "step": 90 + }, + { + "epoch": 0.2693125442948264, + "grad_norm": 0.7144332528114319, + "learning_rate": 2.9877216677991737e-05, + "loss": 1.2679, + "step": 95 + }, + { + "epoch": 0.28348688873139616, + "grad_norm": 0.6533330678939819, + "learning_rate": 2.984549812619624e-05, + "loss": 1.2172, + "step": 100 + }, + { + "epoch": 0.297661233167966, + "grad_norm": 0.6547049880027771, + "learning_rate": 2.981016023139278e-05, + "loss": 1.1827, + "step": 105 + }, + { + "epoch": 0.3118355776045358, + "grad_norm": 0.749098002910614, + "learning_rate": 2.9771211608985268e-05, + "loss": 1.1156, + "step": 110 + }, + { + "epoch": 0.3260099220411056, + "grad_norm": 0.7490646839141846, + "learning_rate": 2.9728661754675553e-05, + "loss": 1.1474, + "step": 115 + }, + { + "epoch": 0.3401842664776754, + "grad_norm": 0.8351019620895386, + "learning_rate": 2.968252104214841e-05, + "loss": 1.0791, + "step": 120 + }, + { + "epoch": 0.3543586109142452, + "grad_norm": 0.9544764161109924, + "learning_rate": 2.963280072054238e-05, + "loss": 1.0842, + "step": 125 + }, + { + "epoch": 0.36853295535081504, + "grad_norm": 0.7722133994102478, + "learning_rate": 2.9579512911707257e-05, + "loss": 0.984, + "step": 130 + }, + { + "epoch": 0.3827072997873848, + "grad_norm": 0.8375613689422607, + "learning_rate": 2.9522670607248758e-05, + "loss": 1.0713, + "step": 135 + }, + { + "epoch": 0.39688164422395467, + "grad_norm": 0.8959308862686157, + "learning_rate": 2.946228766536116e-05, + "loss": 1.0211, + "step": 140 + }, + { + "epoch": 0.41105598866052445, + "grad_norm": 0.8785033822059631, + "learning_rate": 2.939837880744866e-05, + "loss": 1.0139, + "step": 145 + }, + { + "epoch": 0.42523033309709424, + "grad_norm": 1.2079600095748901, + "learning_rate": 2.9330959614536314e-05, + "loss": 0.9576, + "step": 150 + }, + { + "epoch": 0.4394046775336641, + "grad_norm": 0.8604059219360352, + "learning_rate": 2.926004652347132e-05, + "loss": 0.9402, + "step": 155 + }, + { + "epoch": 0.45357902197023386, + "grad_norm": 1.0724509954452515, + "learning_rate": 2.9185656822915748e-05, + "loss": 0.9707, + "step": 160 + }, + { + "epoch": 0.4677533664068037, + "grad_norm": 0.8841168284416199, + "learning_rate": 2.910780864913153e-05, + "loss": 0.9485, + "step": 165 + }, + { + "epoch": 0.4819277108433735, + "grad_norm": 0.9044045805931091, + "learning_rate": 2.9026520981558844e-05, + "loss": 0.8888, + "step": 170 + }, + { + "epoch": 0.4961020552799433, + "grad_norm": 1.2042068243026733, + "learning_rate": 2.8941813638188887e-05, + "loss": 0.9056, + "step": 175 + }, + { + "epoch": 0.5102763997165131, + "grad_norm": 1.0029056072235107, + "learning_rate": 2.8853707270732256e-05, + "loss": 0.8918, + "step": 180 + }, + { + "epoch": 0.5244507441530829, + "grad_norm": 1.0466610193252563, + "learning_rate": 2.8762223359584033e-05, + "loss": 0.8433, + "step": 185 + }, + { + "epoch": 0.5386250885896527, + "grad_norm": 0.9973906874656677, + "learning_rate": 2.8667384208586863e-05, + "loss": 0.8381, + "step": 190 + }, + { + "epoch": 0.5527994330262226, + "grad_norm": 0.9874187707901001, + "learning_rate": 2.8569212939593252e-05, + "loss": 0.7764, + "step": 195 + }, + { + "epoch": 0.5669737774627923, + "grad_norm": 1.0236252546310425, + "learning_rate": 2.8467733486828448e-05, + "loss": 0.8216, + "step": 200 + }, + { + "epoch": 0.5811481218993622, + "grad_norm": 0.9415414929389954, + "learning_rate": 2.8362970591055248e-05, + "loss": 0.7554, + "step": 205 + }, + { + "epoch": 0.595322466335932, + "grad_norm": 1.0524959564208984, + "learning_rate": 2.8254949793542197e-05, + "loss": 0.7983, + "step": 210 + }, + { + "epoch": 0.6094968107725017, + "grad_norm": 1.0293519496917725, + "learning_rate": 2.81436974298366e-05, + "loss": 0.7674, + "step": 215 + }, + { + "epoch": 0.6236711552090716, + "grad_norm": 1.0918949842453003, + "learning_rate": 2.8029240623343908e-05, + "loss": 0.762, + "step": 220 + }, + { + "epoch": 0.6378454996456414, + "grad_norm": 1.1275453567504883, + "learning_rate": 2.791160727871499e-05, + "loss": 0.8004, + "step": 225 + }, + { + "epoch": 0.6520198440822113, + "grad_norm": 1.07200026512146, + "learning_rate": 2.779082607504298e-05, + "loss": 0.7612, + "step": 230 + }, + { + "epoch": 0.666194188518781, + "grad_norm": 0.9936215877532959, + "learning_rate": 2.7666926458871292e-05, + "loss": 0.6835, + "step": 235 + }, + { + "epoch": 0.6803685329553508, + "grad_norm": 1.1111797094345093, + "learning_rate": 2.7539938637014517e-05, + "loss": 0.6541, + "step": 240 + }, + { + "epoch": 0.6945428773919207, + "grad_norm": 1.1498838663101196, + "learning_rate": 2.7409893569193998e-05, + "loss": 0.655, + "step": 245 + }, + { + "epoch": 0.7087172218284904, + "grad_norm": 1.1505091190338135, + "learning_rate": 2.7276822960489815e-05, + "loss": 0.6986, + "step": 250 + }, + { + "epoch": 0.7228915662650602, + "grad_norm": 1.1721636056900024, + "learning_rate": 2.7140759253611067e-05, + "loss": 0.741, + "step": 255 + }, + { + "epoch": 0.7370659107016301, + "grad_norm": 1.0921951532363892, + "learning_rate": 2.7001735620986323e-05, + "loss": 0.6253, + "step": 260 + }, + { + "epoch": 0.7512402551381998, + "grad_norm": 1.2387495040893555, + "learning_rate": 2.6859785956676157e-05, + "loss": 0.6218, + "step": 265 + }, + { + "epoch": 0.7654145995747696, + "grad_norm": 1.1880497932434082, + "learning_rate": 2.6714944868109744e-05, + "loss": 0.6335, + "step": 270 + }, + { + "epoch": 0.7795889440113395, + "grad_norm": 1.2927953004837036, + "learning_rate": 2.6567247667647545e-05, + "loss": 0.6327, + "step": 275 + }, + { + "epoch": 0.7937632884479093, + "grad_norm": 1.1976948976516724, + "learning_rate": 2.641673036397215e-05, + "loss": 0.5687, + "step": 280 + }, + { + "epoch": 0.8079376328844791, + "grad_norm": 1.0663704872131348, + "learning_rate": 2.626342965330931e-05, + "loss": 0.6058, + "step": 285 + }, + { + "epoch": 0.8221119773210489, + "grad_norm": 1.2573120594024658, + "learning_rate": 2.6107382910481377e-05, + "loss": 0.5934, + "step": 290 + }, + { + "epoch": 0.8362863217576187, + "grad_norm": 1.3996970653533936, + "learning_rate": 2.5948628179795307e-05, + "loss": 0.5775, + "step": 295 + }, + { + "epoch": 0.8504606661941885, + "grad_norm": 1.1950550079345703, + "learning_rate": 2.5787204165767414e-05, + "loss": 0.5524, + "step": 300 + }, + { + "epoch": 0.8646350106307583, + "grad_norm": 1.3301461935043335, + "learning_rate": 2.56231502236872e-05, + "loss": 0.6135, + "step": 305 + }, + { + "epoch": 0.8788093550673282, + "grad_norm": 1.1953920125961304, + "learning_rate": 2.5456506350022493e-05, + "loss": 0.4735, + "step": 310 + }, + { + "epoch": 0.892983699503898, + "grad_norm": 1.2606408596038818, + "learning_rate": 2.5287313172668283e-05, + "loss": 0.5669, + "step": 315 + }, + { + "epoch": 0.9071580439404677, + "grad_norm": 1.308218002319336, + "learning_rate": 2.511561194104161e-05, + "loss": 0.5662, + "step": 320 + }, + { + "epoch": 0.9213323883770376, + "grad_norm": 1.294284701347351, + "learning_rate": 2.494144451602495e-05, + "loss": 0.5014, + "step": 325 + }, + { + "epoch": 0.9355067328136074, + "grad_norm": 1.18809175491333, + "learning_rate": 2.4764853359760448e-05, + "loss": 0.5559, + "step": 330 + }, + { + "epoch": 0.9496810772501771, + "grad_norm": 1.2089568376541138, + "learning_rate": 2.458588152529769e-05, + "loss": 0.5318, + "step": 335 + }, + { + "epoch": 0.963855421686747, + "grad_norm": 1.1450402736663818, + "learning_rate": 2.440457264609727e-05, + "loss": 0.5046, + "step": 340 + }, + { + "epoch": 0.9780297661233168, + "grad_norm": 1.2504876852035522, + "learning_rate": 2.4220970925392984e-05, + "loss": 0.514, + "step": 345 + }, + { + "epoch": 0.9922041105598866, + "grad_norm": 1.3590415716171265, + "learning_rate": 2.403512112541498e-05, + "loss": 0.53, + "step": 350 + }, + { + "epoch": 1.005669737774628, + "grad_norm": 1.2693856954574585, + "learning_rate": 2.384706855647676e-05, + "loss": 0.499, + "step": 355 + }, + { + "epoch": 1.0198440822111978, + "grad_norm": 1.3228222131729126, + "learning_rate": 2.365685906592846e-05, + "loss": 0.3963, + "step": 360 + }, + { + "epoch": 1.0340184266477674, + "grad_norm": 1.2264729738235474, + "learning_rate": 2.3464539026979235e-05, + "loss": 0.4499, + "step": 365 + }, + { + "epoch": 1.0481927710843373, + "grad_norm": 1.2709811925888062, + "learning_rate": 2.327015532739145e-05, + "loss": 0.399, + "step": 370 + }, + { + "epoch": 1.0623671155209071, + "grad_norm": 1.5685312747955322, + "learning_rate": 2.3073755358049395e-05, + "loss": 0.4025, + "step": 375 + }, + { + "epoch": 1.076541459957477, + "grad_norm": 1.2199190855026245, + "learning_rate": 2.2875387001405366e-05, + "loss": 0.4485, + "step": 380 + }, + { + "epoch": 1.0907158043940468, + "grad_norm": 1.2164628505706787, + "learning_rate": 2.2675098619805877e-05, + "loss": 0.3953, + "step": 385 + }, + { + "epoch": 1.1048901488306166, + "grad_norm": 1.3125429153442383, + "learning_rate": 2.2472939043700896e-05, + "loss": 0.3675, + "step": 390 + }, + { + "epoch": 1.1190644932671865, + "grad_norm": 1.2648354768753052, + "learning_rate": 2.2268957559738947e-05, + "loss": 0.3771, + "step": 395 + }, + { + "epoch": 1.133238837703756, + "grad_norm": 1.2221639156341553, + "learning_rate": 2.2063203898750987e-05, + "loss": 0.4183, + "step": 400 + }, + { + "epoch": 1.147413182140326, + "grad_norm": 1.2137963771820068, + "learning_rate": 2.1855728223625986e-05, + "loss": 0.3724, + "step": 405 + }, + { + "epoch": 1.1615875265768958, + "grad_norm": 1.2517437934875488, + "learning_rate": 2.1646581117081185e-05, + "loss": 0.3357, + "step": 410 + }, + { + "epoch": 1.1757618710134656, + "grad_norm": 1.4244778156280518, + "learning_rate": 2.1435813569330012e-05, + "loss": 0.3838, + "step": 415 + }, + { + "epoch": 1.1899362154500355, + "grad_norm": 1.2451945543289185, + "learning_rate": 2.1223476965650586e-05, + "loss": 0.3676, + "step": 420 + }, + { + "epoch": 1.2041105598866053, + "grad_norm": 1.5002508163452148, + "learning_rate": 2.1009623073858003e-05, + "loss": 0.3287, + "step": 425 + }, + { + "epoch": 1.2182849043231752, + "grad_norm": 1.3113237619400024, + "learning_rate": 2.0794304031683267e-05, + "loss": 0.3435, + "step": 430 + }, + { + "epoch": 1.2324592487597448, + "grad_norm": 1.2472240924835205, + "learning_rate": 2.0577572334062094e-05, + "loss": 0.298, + "step": 435 + }, + { + "epoch": 1.2466335931963146, + "grad_norm": 1.0846598148345947, + "learning_rate": 2.0359480820336596e-05, + "loss": 0.3627, + "step": 440 + }, + { + "epoch": 1.2608079376328845, + "grad_norm": 1.3894292116165161, + "learning_rate": 2.0140082661373034e-05, + "loss": 0.3318, + "step": 445 + }, + { + "epoch": 1.2749822820694543, + "grad_norm": 1.274277687072754, + "learning_rate": 1.9919431346598688e-05, + "loss": 0.3553, + "step": 450 + }, + { + "epoch": 1.2891566265060241, + "grad_norm": 1.418903112411499, + "learning_rate": 1.969758067096113e-05, + "loss": 0.3527, + "step": 455 + }, + { + "epoch": 1.303330970942594, + "grad_norm": 1.2959779500961304, + "learning_rate": 1.947458472181296e-05, + "loss": 0.3592, + "step": 460 + }, + { + "epoch": 1.3175053153791638, + "grad_norm": 1.2910457849502563, + "learning_rate": 1.925049786572528e-05, + "loss": 0.3455, + "step": 465 + }, + { + "epoch": 1.3316796598157334, + "grad_norm": 1.2782082557678223, + "learning_rate": 1.9025374735233067e-05, + "loss": 0.3124, + "step": 470 + }, + { + "epoch": 1.3458540042523033, + "grad_norm": 1.2281428575515747, + "learning_rate": 1.8799270215515756e-05, + "loss": 0.3531, + "step": 475 + }, + { + "epoch": 1.3600283486888731, + "grad_norm": 1.1713485717773438, + "learning_rate": 1.8572239431016146e-05, + "loss": 0.3217, + "step": 480 + }, + { + "epoch": 1.374202693125443, + "grad_norm": 1.3013916015625, + "learning_rate": 1.8344337732001073e-05, + "loss": 0.2773, + "step": 485 + }, + { + "epoch": 1.3883770375620128, + "grad_norm": 1.252773642539978, + "learning_rate": 1.8115620681066946e-05, + "loss": 0.289, + "step": 490 + }, + { + "epoch": 1.4025513819985826, + "grad_norm": 1.2815337181091309, + "learning_rate": 1.7886144039593537e-05, + "loss": 0.2987, + "step": 495 + }, + { + "epoch": 1.4167257264351525, + "grad_norm": 1.1866180896759033, + "learning_rate": 1.765596375414936e-05, + "loss": 0.2802, + "step": 500 + }, + { + "epoch": 1.430900070871722, + "grad_norm": 1.5300204753875732, + "learning_rate": 1.74251359428518e-05, + "loss": 0.2936, + "step": 505 + }, + { + "epoch": 1.445074415308292, + "grad_norm": 1.3379368782043457, + "learning_rate": 1.7193716881685534e-05, + "loss": 0.3093, + "step": 510 + }, + { + "epoch": 1.4592487597448618, + "grad_norm": 1.3192110061645508, + "learning_rate": 1.6961762990782346e-05, + "loss": 0.3065, + "step": 515 + }, + { + "epoch": 1.4734231041814316, + "grad_norm": 1.22186279296875, + "learning_rate": 1.6729330820665924e-05, + "loss": 0.2821, + "step": 520 + }, + { + "epoch": 1.4875974486180015, + "grad_norm": 1.2729092836380005, + "learning_rate": 1.6496477038464743e-05, + "loss": 0.2794, + "step": 525 + }, + { + "epoch": 1.501771793054571, + "grad_norm": 1.0796782970428467, + "learning_rate": 1.626325841409662e-05, + "loss": 0.3017, + "step": 530 + }, + { + "epoch": 1.5159461374911412, + "grad_norm": 1.204921007156372, + "learning_rate": 1.602973180642814e-05, + "loss": 0.3026, + "step": 535 + }, + { + "epoch": 1.5301204819277108, + "grad_norm": 1.354894995689392, + "learning_rate": 1.5795954149412445e-05, + "loss": 0.2572, + "step": 540 + }, + { + "epoch": 1.5442948263642806, + "grad_norm": 1.312331199645996, + "learning_rate": 1.5561982438208685e-05, + "loss": 0.2678, + "step": 545 + }, + { + "epoch": 1.5584691708008505, + "grad_norm": 1.2705223560333252, + "learning_rate": 1.5327873715286553e-05, + "loss": 0.2669, + "step": 550 + }, + { + "epoch": 1.5726435152374203, + "grad_norm": 1.473730206489563, + "learning_rate": 1.5093685056519305e-05, + "loss": 0.2243, + "step": 555 + }, + { + "epoch": 1.5868178596739901, + "grad_norm": 1.3220335245132446, + "learning_rate": 1.4859473557268607e-05, + "loss": 0.2445, + "step": 560 + }, + { + "epoch": 1.6009922041105598, + "grad_norm": 1.3100484609603882, + "learning_rate": 1.4625296318464652e-05, + "loss": 0.2151, + "step": 565 + }, + { + "epoch": 1.6151665485471298, + "grad_norm": 1.2081139087677002, + "learning_rate": 1.4391210432684912e-05, + "loss": 0.2929, + "step": 570 + }, + { + "epoch": 1.6293408929836994, + "grad_norm": 1.335688591003418, + "learning_rate": 1.4157272970234925e-05, + "loss": 0.2323, + "step": 575 + }, + { + "epoch": 1.6435152374202693, + "grad_norm": 1.2439665794372559, + "learning_rate": 1.3923540965234527e-05, + "loss": 0.2253, + "step": 580 + }, + { + "epoch": 1.6576895818568391, + "grad_norm": 1.1251533031463623, + "learning_rate": 1.3690071401712863e-05, + "loss": 0.2215, + "step": 585 + }, + { + "epoch": 1.671863926293409, + "grad_norm": 1.1435329914093018, + "learning_rate": 1.345692119971567e-05, + "loss": 0.2355, + "step": 590 + }, + { + "epoch": 1.6860382707299788, + "grad_norm": 1.3442436456680298, + "learning_rate": 1.322414720142812e-05, + "loss": 0.2395, + "step": 595 + }, + { + "epoch": 1.7002126151665484, + "grad_norm": 1.1222180128097534, + "learning_rate": 1.2991806157316647e-05, + "loss": 0.2089, + "step": 600 + }, + { + "epoch": 1.7143869596031185, + "grad_norm": 1.2131556272506714, + "learning_rate": 1.2759954712293147e-05, + "loss": 0.1917, + "step": 605 + }, + { + "epoch": 1.728561304039688, + "grad_norm": 1.051391363143921, + "learning_rate": 1.2528649391904928e-05, + "loss": 0.2142, + "step": 610 + }, + { + "epoch": 1.742735648476258, + "grad_norm": 1.062532901763916, + "learning_rate": 1.2297946588553688e-05, + "loss": 0.1961, + "step": 615 + }, + { + "epoch": 1.7569099929128278, + "grad_norm": 1.2392939329147339, + "learning_rate": 1.2067902547747076e-05, + "loss": 0.2105, + "step": 620 + }, + { + "epoch": 1.7710843373493976, + "grad_norm": 1.2457234859466553, + "learning_rate": 1.1838573354385947e-05, + "loss": 0.2456, + "step": 625 + }, + { + "epoch": 1.7852586817859675, + "grad_norm": 1.2110346555709839, + "learning_rate": 1.1610014919090847e-05, + "loss": 0.212, + "step": 630 + }, + { + "epoch": 1.799433026222537, + "grad_norm": 1.3423017263412476, + "learning_rate": 1.1382282964570956e-05, + "loss": 0.2029, + "step": 635 + }, + { + "epoch": 1.8136073706591072, + "grad_norm": 1.2750815153121948, + "learning_rate": 1.1155433012038847e-05, + "loss": 0.2135, + "step": 640 + }, + { + "epoch": 1.8277817150956768, + "grad_norm": 1.0288220643997192, + "learning_rate": 1.0929520367674389e-05, + "loss": 0.2147, + "step": 645 + }, + { + "epoch": 1.8419560595322466, + "grad_norm": 1.2046732902526855, + "learning_rate": 1.0704600109141043e-05, + "loss": 0.1829, + "step": 650 + }, + { + "epoch": 1.8561304039688165, + "grad_norm": 1.187681794166565, + "learning_rate": 1.0480727072157912e-05, + "loss": 0.2144, + "step": 655 + }, + { + "epoch": 1.8703047484053863, + "grad_norm": 1.3329010009765625, + "learning_rate": 1.0257955837130725e-05, + "loss": 0.1992, + "step": 660 + }, + { + "epoch": 1.8844790928419561, + "grad_norm": 1.0878031253814697, + "learning_rate": 1.0036340715845118e-05, + "loss": 0.2181, + "step": 665 + }, + { + "epoch": 1.8986534372785258, + "grad_norm": 1.1710363626480103, + "learning_rate": 9.815935738225376e-06, + "loss": 0.2021, + "step": 670 + }, + { + "epoch": 1.9128277817150958, + "grad_norm": 1.4507313966751099, + "learning_rate": 9.596794639161892e-06, + "loss": 0.1945, + "step": 675 + }, + { + "epoch": 1.9270021261516654, + "grad_norm": 1.1932235956192017, + "learning_rate": 9.37897084541057e-06, + "loss": 0.1774, + "step": 680 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.2013750076293945, + "learning_rate": 9.16251746256734e-06, + "loss": 0.1654, + "step": 685 + }, + { + "epoch": 1.9553508150248051, + "grad_norm": 1.0388667583465576, + "learning_rate": 8.94748726212097e-06, + "loss": 0.1732, + "step": 690 + }, + { + "epoch": 1.969525159461375, + "grad_norm": 1.274439811706543, + "learning_rate": 8.733932668587371e-06, + "loss": 0.1671, + "step": 695 + }, + { + "epoch": 1.9836995038979448, + "grad_norm": 1.1774682998657227, + "learning_rate": 8.521905746728408e-06, + "loss": 0.1567, + "step": 700 + }, + { + "epoch": 1.9978738483345144, + "grad_norm": 1.2181771993637085, + "learning_rate": 8.311458188858525e-06, + "loss": 0.1565, + "step": 705 + }, + { + "epoch": 2.011339475549256, + "grad_norm": 0.9521868824958801, + "learning_rate": 8.102641302242104e-06, + "loss": 0.1706, + "step": 710 + }, + { + "epoch": 2.0255138199858256, + "grad_norm": 1.1977851390838623, + "learning_rate": 7.89550599658469e-06, + "loss": 0.1464, + "step": 715 + }, + { + "epoch": 2.0396881644223956, + "grad_norm": 0.8665183782577515, + "learning_rate": 7.69010277162122e-06, + "loss": 0.1289, + "step": 720 + }, + { + "epoch": 2.0538625088589653, + "grad_norm": 0.9645025134086609, + "learning_rate": 7.486481704804117e-06, + "loss": 0.1204, + "step": 725 + }, + { + "epoch": 2.068036853295535, + "grad_norm": 1.0500234365463257, + "learning_rate": 7.284692439094369e-06, + "loss": 0.1243, + "step": 730 + }, + { + "epoch": 2.082211197732105, + "grad_norm": 1.0689449310302734, + "learning_rate": 7.084784170858566e-06, + "loss": 0.1384, + "step": 735 + }, + { + "epoch": 2.0963855421686746, + "grad_norm": 1.0275673866271973, + "learning_rate": 6.8868056378747715e-06, + "loss": 0.1328, + "step": 740 + }, + { + "epoch": 2.1105598866052446, + "grad_norm": 1.0826483964920044, + "learning_rate": 6.690805107450209e-06, + "loss": 0.1349, + "step": 745 + }, + { + "epoch": 2.1247342310418142, + "grad_norm": 1.350569725036621, + "learning_rate": 6.496830364653691e-06, + "loss": 0.1446, + "step": 750 + }, + { + "epoch": 2.1389085754783843, + "grad_norm": 1.0912501811981201, + "learning_rate": 6.304928700665545e-06, + "loss": 0.143, + "step": 755 + }, + { + "epoch": 2.153082919914954, + "grad_norm": 1.0563998222351074, + "learning_rate": 6.115146901248015e-06, + "loss": 0.1192, + "step": 760 + }, + { + "epoch": 2.1672572643515235, + "grad_norm": 0.8902752995491028, + "learning_rate": 5.9275312353388635e-06, + "loss": 0.1251, + "step": 765 + }, + { + "epoch": 2.1814316087880936, + "grad_norm": 0.9292100667953491, + "learning_rate": 5.7421274437709586e-06, + "loss": 0.1259, + "step": 770 + }, + { + "epoch": 2.1956059532246632, + "grad_norm": 0.9446889162063599, + "learning_rate": 5.558980728120618e-06, + "loss": 0.1028, + "step": 775 + }, + { + "epoch": 2.2097802976612333, + "grad_norm": 1.147320032119751, + "learning_rate": 5.378135739687457e-06, + "loss": 0.1467, + "step": 780 + }, + { + "epoch": 2.223954642097803, + "grad_norm": 0.8568204045295715, + "learning_rate": 5.199636568608363e-06, + "loss": 0.1205, + "step": 785 + }, + { + "epoch": 2.238128986534373, + "grad_norm": 1.1502645015716553, + "learning_rate": 5.023526733108258e-06, + "loss": 0.1177, + "step": 790 + }, + { + "epoch": 2.2523033309709426, + "grad_norm": 1.0181660652160645, + "learning_rate": 4.849849168890375e-06, + "loss": 0.1104, + "step": 795 + }, + { + "epoch": 2.266477675407512, + "grad_norm": 0.9378981590270996, + "learning_rate": 4.678646218668473e-06, + "loss": 0.1003, + "step": 800 + }, + { + "epoch": 2.2806520198440823, + "grad_norm": 1.043668508529663, + "learning_rate": 4.509959621843638e-06, + "loss": 0.1143, + "step": 805 + }, + { + "epoch": 2.294826364280652, + "grad_norm": 0.7924176454544067, + "learning_rate": 4.3438305043282315e-06, + "loss": 0.1002, + "step": 810 + }, + { + "epoch": 2.309000708717222, + "grad_norm": 1.0647655725479126, + "learning_rate": 4.180299368519332e-06, + "loss": 0.1154, + "step": 815 + }, + { + "epoch": 2.3231750531537916, + "grad_norm": 1.177703619003296, + "learning_rate": 4.019406083424222e-06, + "loss": 0.1331, + "step": 820 + }, + { + "epoch": 2.337349397590361, + "grad_norm": 1.136826753616333, + "learning_rate": 3.861189874940302e-06, + "loss": 0.1138, + "step": 825 + }, + { + "epoch": 2.3515237420269313, + "grad_norm": 0.8386954665184021, + "learning_rate": 3.7056893162918064e-06, + "loss": 0.1106, + "step": 830 + }, + { + "epoch": 2.365698086463501, + "grad_norm": 1.1427544355392456, + "learning_rate": 3.5529423186255833e-06, + "loss": 0.1411, + "step": 835 + }, + { + "epoch": 2.379872430900071, + "grad_norm": 0.9464730024337769, + "learning_rate": 3.4029861217683743e-06, + "loss": 0.1167, + "step": 840 + }, + { + "epoch": 2.3940467753366406, + "grad_norm": 0.9984065294265747, + "learning_rate": 3.2558572851476903e-06, + "loss": 0.1195, + "step": 845 + }, + { + "epoch": 2.4082211197732106, + "grad_norm": 0.7185961604118347, + "learning_rate": 3.111591678878596e-06, + "loss": 0.1204, + "step": 850 + }, + { + "epoch": 2.4223954642097802, + "grad_norm": 1.0266855955123901, + "learning_rate": 2.9702244750185724e-06, + "loss": 0.118, + "step": 855 + }, + { + "epoch": 2.4365698086463503, + "grad_norm": 0.9509408473968506, + "learning_rate": 2.831790138992526e-06, + "loss": 0.1051, + "step": 860 + }, + { + "epoch": 2.45074415308292, + "grad_norm": 0.7837101221084595, + "learning_rate": 2.696322421190091e-06, + "loss": 0.0889, + "step": 865 + }, + { + "epoch": 2.4649184975194895, + "grad_norm": 0.7448822259902954, + "learning_rate": 2.563854348737275e-06, + "loss": 0.1131, + "step": 870 + }, + { + "epoch": 2.4790928419560596, + "grad_norm": 1.1718308925628662, + "learning_rate": 2.434418217444419e-06, + "loss": 0.1179, + "step": 875 + }, + { + "epoch": 2.4932671863926292, + "grad_norm": 1.0667239427566528, + "learning_rate": 2.3080455839324342e-06, + "loss": 0.123, + "step": 880 + }, + { + "epoch": 2.5074415308291993, + "grad_norm": 0.7885234355926514, + "learning_rate": 2.184767257939312e-06, + "loss": 0.1018, + "step": 885 + }, + { + "epoch": 2.521615875265769, + "grad_norm": 0.768433690071106, + "learning_rate": 2.064613294808664e-06, + "loss": 0.1042, + "step": 890 + }, + { + "epoch": 2.5357902197023385, + "grad_norm": 0.8056691884994507, + "learning_rate": 1.947612988162197e-06, + "loss": 0.1081, + "step": 895 + }, + { + "epoch": 2.5499645641389086, + "grad_norm": 0.9662020206451416, + "learning_rate": 1.8337948627579398e-06, + "loss": 0.1127, + "step": 900 + }, + { + "epoch": 2.5641389085754787, + "grad_norm": 0.8402814865112305, + "learning_rate": 1.7231866675358704e-06, + "loss": 0.1036, + "step": 905 + }, + { + "epoch": 2.5783132530120483, + "grad_norm": 1.0836635828018188, + "learning_rate": 1.6158153688526893e-06, + "loss": 0.0982, + "step": 910 + }, + { + "epoch": 2.592487597448618, + "grad_norm": 0.749840259552002, + "learning_rate": 1.5117071439074305e-06, + "loss": 0.0959, + "step": 915 + }, + { + "epoch": 2.606661941885188, + "grad_norm": 0.8625677824020386, + "learning_rate": 1.4108873743594275e-06, + "loss": 0.1216, + "step": 920 + }, + { + "epoch": 2.6208362863217576, + "grad_norm": 0.8191054463386536, + "learning_rate": 1.3133806401402376e-06, + "loss": 0.1026, + "step": 925 + }, + { + "epoch": 2.6350106307583276, + "grad_norm": 0.740037202835083, + "learning_rate": 1.2192107134610586e-06, + "loss": 0.1025, + "step": 930 + }, + { + "epoch": 2.6491849751948973, + "grad_norm": 0.7338634729385376, + "learning_rate": 1.1284005530170305e-06, + "loss": 0.0938, + "step": 935 + }, + { + "epoch": 2.663359319631467, + "grad_norm": 0.9249878525733948, + "learning_rate": 1.0409722983898928e-06, + "loss": 0.1305, + "step": 940 + }, + { + "epoch": 2.677533664068037, + "grad_norm": 0.8364435434341431, + "learning_rate": 9.569472646503424e-07, + "loss": 0.0986, + "step": 945 + }, + { + "epoch": 2.6917080085046066, + "grad_norm": 0.654893696308136, + "learning_rate": 8.763459371614036e-07, + "loss": 0.0871, + "step": 950 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.837059497833252, + "learning_rate": 7.991879665840745e-07, + "loss": 0.1078, + "step": 955 + }, + { + "epoch": 2.7200566973777462, + "grad_norm": 0.7213865518569946, + "learning_rate": 7.254921640864953e-07, + "loss": 0.0868, + "step": 960 + }, + { + "epoch": 2.734231041814316, + "grad_norm": 0.7167119979858398, + "learning_rate": 6.55276496757759e-07, + "loss": 0.0866, + "step": 965 + }, + { + "epoch": 2.748405386250886, + "grad_norm": 0.9091411828994751, + "learning_rate": 5.885580832275244e-07, + "loss": 0.0939, + "step": 970 + }, + { + "epoch": 2.7625797306874555, + "grad_norm": 0.8119795918464661, + "learning_rate": 5.253531894924962e-07, + "loss": 0.1014, + "step": 975 + }, + { + "epoch": 2.7767540751240256, + "grad_norm": 0.727209210395813, + "learning_rate": 4.6567722495074685e-07, + "loss": 0.0973, + "step": 980 + }, + { + "epoch": 2.7909284195605952, + "grad_norm": 0.7903998494148254, + "learning_rate": 4.0954473864489693e-07, + "loss": 0.0961, + "step": 985 + }, + { + "epoch": 2.8051027639971653, + "grad_norm": 0.8566182255744934, + "learning_rate": 3.5696941571505436e-07, + "loss": 0.1019, + "step": 990 + }, + { + "epoch": 2.819277108433735, + "grad_norm": 0.6877623796463013, + "learning_rate": 3.079640740623679e-07, + "loss": 0.0774, + "step": 995 + }, + { + "epoch": 2.833451452870305, + "grad_norm": 0.6469544172286987, + "learning_rate": 2.625406612240039e-07, + "loss": 0.1023, + "step": 1000 + }, + { + "epoch": 2.8476257973068746, + "grad_norm": 0.8671718239784241, + "learning_rate": 2.207102514603393e-07, + "loss": 0.1061, + "step": 1005 + }, + { + "epoch": 2.861800141743444, + "grad_norm": 0.7091161012649536, + "learning_rate": 1.8248304305504505e-07, + "loss": 0.0833, + "step": 1010 + }, + { + "epoch": 2.8759744861800143, + "grad_norm": 0.821736216545105, + "learning_rate": 1.4786835582873137e-07, + "loss": 0.1217, + "step": 1015 + }, + { + "epoch": 2.890148830616584, + "grad_norm": 0.7606230974197388, + "learning_rate": 1.1687462886677713e-07, + "loss": 0.1028, + "step": 1020 + }, + { + "epoch": 2.904323175053154, + "grad_norm": 0.7250263690948486, + "learning_rate": 8.950941846187721e-08, + "loss": 0.1041, + "step": 1025 + }, + { + "epoch": 2.9184975194897236, + "grad_norm": 0.7371218204498291, + "learning_rate": 6.577939627179785e-08, + "loss": 0.0976, + "step": 1030 + }, + { + "epoch": 2.932671863926293, + "grad_norm": 0.6483851075172424, + "learning_rate": 4.5690347692837755e-08, + "loss": 0.101, + "step": 1035 + }, + { + "epoch": 2.9468462083628633, + "grad_norm": 0.8261492848396301, + "learning_rate": 2.9247170449338e-08, + "loss": 0.0987, + "step": 1040 + }, + { + "epoch": 2.961020552799433, + "grad_norm": 0.7786318063735962, + "learning_rate": 1.6453873399610576e-08, + "loss": 0.0933, + "step": 1045 + }, + { + "epoch": 2.975194897236003, + "grad_norm": 0.7456707954406738, + "learning_rate": 7.313575558583474e-09, + "loss": 0.0875, + "step": 1050 + }, + { + "epoch": 2.9893692416725726, + "grad_norm": 0.8292640447616577, + "learning_rate": 1.8285053373706673e-09, + "loss": 0.1032, + "step": 1055 + }, + { + "epoch": 3.0, + "step": 1059, + "total_flos": 1.3742123318855598e+18, + "train_loss": 0.4448279767130995, + "train_runtime": 1090.317, + "train_samples_per_second": 31.059, + "train_steps_per_second": 0.971 + } + ], + "logging_steps": 5, + "max_steps": 1059, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3742123318855598e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..29a7446e0e338f999952fe01dab72f2ebd087b31 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/8_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75dfecb8c3ed396efab2e05ef5f099c20ec869e14703ff3a22d823fc199a2e39 +size 8273 diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f048e02029334a7ec60f22629c87dc7a4e7cbb65 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 9_128_e3_3e-5 + results: [] +--- + + + +# 9_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..80379af12f3a01d71abd40b885dc9c3fbc315eb0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "v_proj", + "gate_proj", + "up_proj", + "q_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf4b16496c9938bd1abddbba58a52ed52b2cfcc6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3694937b736ebfcd9d28e48fcb5a19908ce5997c461f43b9fc806b9652e5158 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..072b7939934359395bc5cd62b7d2d874934b9820 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.290247299649241e+18, + "train_loss": 0.45301662641035956, + "train_runtime": 1031.036, + "train_samples": 10993, + "train_samples_per_second": 31.986, + "train_steps_per_second": 1.001 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..072b7939934359395bc5cd62b7d2d874934b9820 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.290247299649241e+18, + "train_loss": 0.45301662641035956, + "train_runtime": 1031.036, + "train_samples": 10993, + "train_samples_per_second": 31.986, + "train_steps_per_second": 1.001 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0844bfb7950f1abf077abfc7a8146fbbbd2328a1 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1485 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1032, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014545454545454545, + "grad_norm": 0.7264707088470459, + "learning_rate": 2.307692307692308e-06, + "loss": 1.5969, + "step": 5 + }, + { + "epoch": 0.02909090909090909, + "grad_norm": 0.6494286060333252, + "learning_rate": 5.192307692307692e-06, + "loss": 1.5776, + "step": 10 + }, + { + "epoch": 0.04363636363636364, + "grad_norm": 0.5675637125968933, + "learning_rate": 8.076923076923077e-06, + "loss": 1.6802, + "step": 15 + }, + { + "epoch": 0.05818181818181818, + "grad_norm": 0.5521537065505981, + "learning_rate": 1.0961538461538462e-05, + "loss": 1.558, + "step": 20 + }, + { + "epoch": 0.07272727272727272, + "grad_norm": 0.5366525650024414, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.6009, + "step": 25 + }, + { + "epoch": 0.08727272727272728, + "grad_norm": 0.6110736727714539, + "learning_rate": 1.673076923076923e-05, + "loss": 1.6218, + "step": 30 + }, + { + "epoch": 0.10181818181818182, + "grad_norm": 0.4851386547088623, + "learning_rate": 1.9615384615384617e-05, + "loss": 1.541, + "step": 35 + }, + { + "epoch": 0.11636363636363636, + "grad_norm": 0.5681484341621399, + "learning_rate": 2.25e-05, + "loss": 1.5386, + "step": 40 + }, + { + "epoch": 0.13090909090909092, + "grad_norm": 0.5013815760612488, + "learning_rate": 2.5384615384615386e-05, + "loss": 1.5045, + "step": 45 + }, + { + "epoch": 0.14545454545454545, + "grad_norm": 0.5571824908256531, + "learning_rate": 2.8269230769230768e-05, + "loss": 1.4608, + "step": 50 + }, + { + "epoch": 0.16, + "grad_norm": 0.5977470278739929, + "learning_rate": 2.999969170437549e-05, + "loss": 1.4699, + "step": 55 + }, + { + "epoch": 0.17454545454545456, + "grad_norm": 0.6371142864227295, + "learning_rate": 2.99962235241376e-05, + "loss": 1.3543, + "step": 60 + }, + { + "epoch": 0.1890909090909091, + "grad_norm": 0.6023612022399902, + "learning_rate": 2.9988902688106014e-05, + "loss": 1.4186, + "step": 65 + }, + { + "epoch": 0.20363636363636364, + "grad_norm": 0.6394612789154053, + "learning_rate": 2.9977731077065013e-05, + "loss": 1.3716, + "step": 70 + }, + { + "epoch": 0.21818181818181817, + "grad_norm": 0.6529921293258667, + "learning_rate": 2.996271156109531e-05, + "loss": 1.3429, + "step": 75 + }, + { + "epoch": 0.23272727272727273, + "grad_norm": 0.6804682612419128, + "learning_rate": 2.9943847998836723e-05, + "loss": 1.2903, + "step": 80 + }, + { + "epoch": 0.24727272727272728, + "grad_norm": 0.6208245754241943, + "learning_rate": 2.992114523649686e-05, + "loss": 1.2536, + "step": 85 + }, + { + "epoch": 0.26181818181818184, + "grad_norm": 0.6965234875679016, + "learning_rate": 2.9894609106606067e-05, + "loss": 1.2711, + "step": 90 + }, + { + "epoch": 0.27636363636363637, + "grad_norm": 0.8043530583381653, + "learning_rate": 2.9864246426519023e-05, + "loss": 1.2553, + "step": 95 + }, + { + "epoch": 0.2909090909090909, + "grad_norm": 0.7686089277267456, + "learning_rate": 2.983006499666329e-05, + "loss": 1.1818, + "step": 100 + }, + { + "epoch": 0.3054545454545455, + "grad_norm": 0.8069982528686523, + "learning_rate": 2.9792073598535322e-05, + "loss": 1.1375, + "step": 105 + }, + { + "epoch": 0.32, + "grad_norm": 0.8020914196968079, + "learning_rate": 2.9750281992444442e-05, + "loss": 1.1251, + "step": 110 + }, + { + "epoch": 0.33454545454545453, + "grad_norm": 0.9252700209617615, + "learning_rate": 2.970470091500531e-05, + "loss": 1.084, + "step": 115 + }, + { + "epoch": 0.3490909090909091, + "grad_norm": 0.9176790714263916, + "learning_rate": 2.9655342076379596e-05, + "loss": 1.0155, + "step": 120 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.9263909459114075, + "learning_rate": 2.9602218157267572e-05, + "loss": 1.0171, + "step": 125 + }, + { + "epoch": 0.3781818181818182, + "grad_norm": 0.8368505239486694, + "learning_rate": 2.9545342805650304e-05, + "loss": 1.0781, + "step": 130 + }, + { + "epoch": 0.3927272727272727, + "grad_norm": 0.8588261604309082, + "learning_rate": 2.9484730633283385e-05, + "loss": 1.0114, + "step": 135 + }, + { + "epoch": 0.4072727272727273, + "grad_norm": 0.9214907288551331, + "learning_rate": 2.942039721194304e-05, + "loss": 1.1083, + "step": 140 + }, + { + "epoch": 0.4218181818181818, + "grad_norm": 1.1499109268188477, + "learning_rate": 2.935235906942563e-05, + "loss": 1.0128, + "step": 145 + }, + { + "epoch": 0.43636363636363634, + "grad_norm": 0.912686824798584, + "learning_rate": 2.92806336853015e-05, + "loss": 0.9794, + "step": 150 + }, + { + "epoch": 0.4509090909090909, + "grad_norm": 1.0260411500930786, + "learning_rate": 2.920523948642432e-05, + "loss": 0.9644, + "step": 155 + }, + { + "epoch": 0.46545454545454545, + "grad_norm": 1.0155518054962158, + "learning_rate": 2.9126195842197113e-05, + "loss": 0.9489, + "step": 160 + }, + { + "epoch": 0.48, + "grad_norm": 1.1520061492919922, + "learning_rate": 2.904352305959606e-05, + "loss": 0.9365, + "step": 165 + }, + { + "epoch": 0.49454545454545457, + "grad_norm": 1.1494250297546387, + "learning_rate": 2.895724237795347e-05, + "loss": 0.9708, + "step": 170 + }, + { + "epoch": 0.509090909090909, + "grad_norm": 0.9634900689125061, + "learning_rate": 2.8867375963501223e-05, + "loss": 0.9144, + "step": 175 + }, + { + "epoch": 0.5236363636363637, + "grad_norm": 1.371781826019287, + "learning_rate": 2.8773946903676092e-05, + "loss": 0.8395, + "step": 180 + }, + { + "epoch": 0.5381818181818182, + "grad_norm": 1.1032471656799316, + "learning_rate": 2.8676979201188352e-05, + "loss": 0.8596, + "step": 185 + }, + { + "epoch": 0.5527272727272727, + "grad_norm": 1.2435821294784546, + "learning_rate": 2.8576497767855325e-05, + "loss": 0.8507, + "step": 190 + }, + { + "epoch": 0.5672727272727273, + "grad_norm": 1.119550108909607, + "learning_rate": 2.8472528418201283e-05, + "loss": 0.7998, + "step": 195 + }, + { + "epoch": 0.5818181818181818, + "grad_norm": 1.2333793640136719, + "learning_rate": 2.8365097862825516e-05, + "loss": 0.8276, + "step": 200 + }, + { + "epoch": 0.5963636363636363, + "grad_norm": 1.0262360572814941, + "learning_rate": 2.825423370154013e-05, + "loss": 0.7715, + "step": 205 + }, + { + "epoch": 0.610909090909091, + "grad_norm": 1.280503273010254, + "learning_rate": 2.8139964416279427e-05, + "loss": 0.8088, + "step": 210 + }, + { + "epoch": 0.6254545454545455, + "grad_norm": 1.3623501062393188, + "learning_rate": 2.8022319363782676e-05, + "loss": 0.7467, + "step": 215 + }, + { + "epoch": 0.64, + "grad_norm": 1.2655526399612427, + "learning_rate": 2.7901328768052095e-05, + "loss": 0.7826, + "step": 220 + }, + { + "epoch": 0.6545454545454545, + "grad_norm": 1.213940978050232, + "learning_rate": 2.7777023712588064e-05, + "loss": 0.7578, + "step": 225 + }, + { + "epoch": 0.6690909090909091, + "grad_norm": 1.177565336227417, + "learning_rate": 2.7649436132403513e-05, + "loss": 0.6988, + "step": 230 + }, + { + "epoch": 0.6836363636363636, + "grad_norm": 1.2984496355056763, + "learning_rate": 2.7518598805819542e-05, + "loss": 0.7672, + "step": 235 + }, + { + "epoch": 0.6981818181818182, + "grad_norm": 1.288856029510498, + "learning_rate": 2.7384545346044402e-05, + "loss": 0.747, + "step": 240 + }, + { + "epoch": 0.7127272727272728, + "grad_norm": 1.4381195306777954, + "learning_rate": 2.7247310192537978e-05, + "loss": 0.7363, + "step": 245 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 1.3553348779678345, + "learning_rate": 2.7106928602164006e-05, + "loss": 0.6778, + "step": 250 + }, + { + "epoch": 0.7418181818181818, + "grad_norm": 1.1421858072280884, + "learning_rate": 2.696343664013227e-05, + "loss": 0.6254, + "step": 255 + }, + { + "epoch": 0.7563636363636363, + "grad_norm": 1.3648077249526978, + "learning_rate": 2.681687117073317e-05, + "loss": 0.6424, + "step": 260 + }, + { + "epoch": 0.7709090909090909, + "grad_norm": 1.228105068206787, + "learning_rate": 2.666726984786696e-05, + "loss": 0.67, + "step": 265 + }, + { + "epoch": 0.7854545454545454, + "grad_norm": 1.2977365255355835, + "learning_rate": 2.6514671105370166e-05, + "loss": 0.6781, + "step": 270 + }, + { + "epoch": 0.8, + "grad_norm": 1.2062342166900635, + "learning_rate": 2.635911414714158e-05, + "loss": 0.6077, + "step": 275 + }, + { + "epoch": 0.8145454545454546, + "grad_norm": 1.28080415725708, + "learning_rate": 2.6200638937070474e-05, + "loss": 0.607, + "step": 280 + }, + { + "epoch": 0.8290909090909091, + "grad_norm": 1.0646826028823853, + "learning_rate": 2.6039286188769527e-05, + "loss": 0.6369, + "step": 285 + }, + { + "epoch": 0.8436363636363636, + "grad_norm": 1.6239848136901855, + "learning_rate": 2.587509735511516e-05, + "loss": 0.6843, + "step": 290 + }, + { + "epoch": 0.8581818181818182, + "grad_norm": 1.2461433410644531, + "learning_rate": 2.5708114617597946e-05, + "loss": 0.563, + "step": 295 + }, + { + "epoch": 0.8727272727272727, + "grad_norm": 1.3043389320373535, + "learning_rate": 2.553838087548584e-05, + "loss": 0.5892, + "step": 300 + }, + { + "epoch": 0.8872727272727273, + "grad_norm": 1.3801723718643188, + "learning_rate": 2.5365939734802973e-05, + "loss": 0.5592, + "step": 305 + }, + { + "epoch": 0.9018181818181819, + "grad_norm": 1.1704449653625488, + "learning_rate": 2.5190835497126915e-05, + "loss": 0.5428, + "step": 310 + }, + { + "epoch": 0.9163636363636364, + "grad_norm": 1.199084997177124, + "learning_rate": 2.501311314820722e-05, + "loss": 0.5003, + "step": 315 + }, + { + "epoch": 0.9309090909090909, + "grad_norm": 1.3307812213897705, + "learning_rate": 2.4832818346408228e-05, + "loss": 0.5335, + "step": 320 + }, + { + "epoch": 0.9454545454545454, + "grad_norm": 1.2685518264770508, + "learning_rate": 2.4649997410979012e-05, + "loss": 0.4948, + "step": 325 + }, + { + "epoch": 0.96, + "grad_norm": 1.3725152015686035, + "learning_rate": 2.446469731015361e-05, + "loss": 0.5856, + "step": 330 + }, + { + "epoch": 0.9745454545454545, + "grad_norm": 1.234434962272644, + "learning_rate": 2.4276965649084474e-05, + "loss": 0.5011, + "step": 335 + }, + { + "epoch": 0.9890909090909091, + "grad_norm": 1.2778897285461426, + "learning_rate": 2.40868506576123e-05, + "loss": 0.532, + "step": 340 + }, + { + "epoch": 1.002909090909091, + "grad_norm": 1.2850748300552368, + "learning_rate": 2.3894401177875386e-05, + "loss": 0.4732, + "step": 345 + }, + { + "epoch": 1.0174545454545454, + "grad_norm": 1.4895282983779907, + "learning_rate": 2.369966665176168e-05, + "loss": 0.4604, + "step": 350 + }, + { + "epoch": 1.032, + "grad_norm": 1.2257667779922485, + "learning_rate": 2.350269710820675e-05, + "loss": 0.4371, + "step": 355 + }, + { + "epoch": 1.0465454545454544, + "grad_norm": 1.3598411083221436, + "learning_rate": 2.330354315034089e-05, + "loss": 0.4345, + "step": 360 + }, + { + "epoch": 1.061090909090909, + "grad_norm": 1.277349591255188, + "learning_rate": 2.3102255942488804e-05, + "loss": 0.4224, + "step": 365 + }, + { + "epoch": 1.0756363636363637, + "grad_norm": 1.383383870124817, + "learning_rate": 2.2898887197025023e-05, + "loss": 0.3988, + "step": 370 + }, + { + "epoch": 1.0901818181818181, + "grad_norm": 1.3293788433074951, + "learning_rate": 2.2693489161088592e-05, + "loss": 0.4253, + "step": 375 + }, + { + "epoch": 1.1047272727272728, + "grad_norm": 1.252766728401184, + "learning_rate": 2.248611460316031e-05, + "loss": 0.3868, + "step": 380 + }, + { + "epoch": 1.1192727272727272, + "grad_norm": 1.3111717700958252, + "learning_rate": 2.227681679950608e-05, + "loss": 0.3913, + "step": 385 + }, + { + "epoch": 1.1338181818181818, + "grad_norm": 1.346048355102539, + "learning_rate": 2.2065649520489798e-05, + "loss": 0.377, + "step": 390 + }, + { + "epoch": 1.1483636363636363, + "grad_norm": 1.319400668144226, + "learning_rate": 2.1852667016759273e-05, + "loss": 0.3677, + "step": 395 + }, + { + "epoch": 1.162909090909091, + "grad_norm": 1.3299742937088013, + "learning_rate": 2.1637924005308797e-05, + "loss": 0.3532, + "step": 400 + }, + { + "epoch": 1.1774545454545455, + "grad_norm": 1.478053092956543, + "learning_rate": 2.1421475655421887e-05, + "loss": 0.4171, + "step": 405 + }, + { + "epoch": 1.192, + "grad_norm": 1.5179338455200195, + "learning_rate": 2.120337757449781e-05, + "loss": 0.3389, + "step": 410 + }, + { + "epoch": 1.2065454545454546, + "grad_norm": 1.3966190814971924, + "learning_rate": 2.0983685793765626e-05, + "loss": 0.3809, + "step": 415 + }, + { + "epoch": 1.221090909090909, + "grad_norm": 1.336234450340271, + "learning_rate": 2.076245675388924e-05, + "loss": 0.3402, + "step": 420 + }, + { + "epoch": 1.2356363636363636, + "grad_norm": 1.3282886743545532, + "learning_rate": 2.0539747290467348e-05, + "loss": 0.3403, + "step": 425 + }, + { + "epoch": 1.2501818181818183, + "grad_norm": 1.1845920085906982, + "learning_rate": 2.03156146194319e-05, + "loss": 0.3487, + "step": 430 + }, + { + "epoch": 1.2647272727272727, + "grad_norm": 1.5359302759170532, + "learning_rate": 2.0090116322348816e-05, + "loss": 0.3743, + "step": 435 + }, + { + "epoch": 1.2792727272727273, + "grad_norm": 1.5021814107894897, + "learning_rate": 1.9863310331624848e-05, + "loss": 0.3458, + "step": 440 + }, + { + "epoch": 1.2938181818181818, + "grad_norm": 1.5517792701721191, + "learning_rate": 1.963525491562421e-05, + "loss": 0.3301, + "step": 445 + }, + { + "epoch": 1.3083636363636364, + "grad_norm": 1.1594719886779785, + "learning_rate": 1.9406008663698973e-05, + "loss": 0.3081, + "step": 450 + }, + { + "epoch": 1.322909090909091, + "grad_norm": 1.6713287830352783, + "learning_rate": 1.9175630471136952e-05, + "loss": 0.3074, + "step": 455 + }, + { + "epoch": 1.3374545454545455, + "grad_norm": 1.331258773803711, + "learning_rate": 1.894417952403102e-05, + "loss": 0.3102, + "step": 460 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 1.5469774007797241, + "learning_rate": 1.8711715284073715e-05, + "loss": 0.3574, + "step": 465 + }, + { + "epoch": 1.3665454545454545, + "grad_norm": 1.2858690023422241, + "learning_rate": 1.847829747328102e-05, + "loss": 0.3308, + "step": 470 + }, + { + "epoch": 1.3810909090909091, + "grad_norm": 1.509906530380249, + "learning_rate": 1.824398605864925e-05, + "loss": 0.3094, + "step": 475 + }, + { + "epoch": 1.3956363636363636, + "grad_norm": 1.3399333953857422, + "learning_rate": 1.8008841236749092e-05, + "loss": 0.318, + "step": 480 + }, + { + "epoch": 1.4101818181818182, + "grad_norm": 1.6539268493652344, + "learning_rate": 1.7772923418260533e-05, + "loss": 0.3062, + "step": 485 + }, + { + "epoch": 1.4247272727272726, + "grad_norm": 1.32951021194458, + "learning_rate": 1.753629321245288e-05, + "loss": 0.268, + "step": 490 + }, + { + "epoch": 1.4392727272727273, + "grad_norm": 1.1636137962341309, + "learning_rate": 1.7299011411613738e-05, + "loss": 0.2798, + "step": 495 + }, + { + "epoch": 1.453818181818182, + "grad_norm": 1.2749242782592773, + "learning_rate": 1.7061138975430944e-05, + "loss": 0.289, + "step": 500 + }, + { + "epoch": 1.4683636363636363, + "grad_norm": 1.5106180906295776, + "learning_rate": 1.682273701533151e-05, + "loss": 0.3093, + "step": 505 + }, + { + "epoch": 1.482909090909091, + "grad_norm": 1.4261442422866821, + "learning_rate": 1.6583866778781593e-05, + "loss": 0.2563, + "step": 510 + }, + { + "epoch": 1.4974545454545454, + "grad_norm": 1.328650712966919, + "learning_rate": 1.6344589633551502e-05, + "loss": 0.2711, + "step": 515 + }, + { + "epoch": 1.512, + "grad_norm": 1.3403315544128418, + "learning_rate": 1.6104967051949824e-05, + "loss": 0.2452, + "step": 520 + }, + { + "epoch": 1.5265454545454546, + "grad_norm": 1.5379743576049805, + "learning_rate": 1.586506059503062e-05, + "loss": 0.2638, + "step": 525 + }, + { + "epoch": 1.541090909090909, + "grad_norm": 1.343976378440857, + "learning_rate": 1.5624931896777923e-05, + "loss": 0.2526, + "step": 530 + }, + { + "epoch": 1.5556363636363635, + "grad_norm": 1.197616696357727, + "learning_rate": 1.538464264827143e-05, + "loss": 0.2447, + "step": 535 + }, + { + "epoch": 1.5701818181818181, + "grad_norm": 1.4419299364089966, + "learning_rate": 1.5144254581837549e-05, + "loss": 0.2483, + "step": 540 + }, + { + "epoch": 1.5847272727272728, + "grad_norm": 1.6724594831466675, + "learning_rate": 1.4903829455189833e-05, + "loss": 0.2907, + "step": 545 + }, + { + "epoch": 1.5992727272727274, + "grad_norm": 1.1713954210281372, + "learning_rate": 1.4663429035562928e-05, + "loss": 0.2097, + "step": 550 + }, + { + "epoch": 1.6138181818181818, + "grad_norm": 1.3554898500442505, + "learning_rate": 1.4423115083844024e-05, + "loss": 0.2539, + "step": 555 + }, + { + "epoch": 1.6283636363636362, + "grad_norm": 1.2478020191192627, + "learning_rate": 1.4182949338705999e-05, + "loss": 0.2326, + "step": 560 + }, + { + "epoch": 1.6429090909090909, + "grad_norm": 1.432706356048584, + "learning_rate": 1.394299350074619e-05, + "loss": 0.2348, + "step": 565 + }, + { + "epoch": 1.6574545454545455, + "grad_norm": 1.1970672607421875, + "learning_rate": 1.3703309216635049e-05, + "loss": 0.2495, + "step": 570 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 1.5389529466629028, + "learning_rate": 1.346395806327853e-05, + "loss": 0.2476, + "step": 575 + }, + { + "epoch": 1.6865454545454546, + "grad_norm": 1.6304337978363037, + "learning_rate": 1.3225001531998518e-05, + "loss": 0.1998, + "step": 580 + }, + { + "epoch": 1.701090909090909, + "grad_norm": 1.1892495155334473, + "learning_rate": 1.2986501012735174e-05, + "loss": 0.1995, + "step": 585 + }, + { + "epoch": 1.7156363636363636, + "grad_norm": 1.4064244031906128, + "learning_rate": 1.2748517778275314e-05, + "loss": 0.2154, + "step": 590 + }, + { + "epoch": 1.7301818181818183, + "grad_norm": 1.3358298540115356, + "learning_rate": 1.2511112968510988e-05, + "loss": 0.2258, + "step": 595 + }, + { + "epoch": 1.7447272727272727, + "grad_norm": 1.19570791721344, + "learning_rate": 1.2274347574732037e-05, + "loss": 0.189, + "step": 600 + }, + { + "epoch": 1.7592727272727273, + "grad_norm": 1.6295207738876343, + "learning_rate": 1.2038282423956994e-05, + "loss": 0.2267, + "step": 605 + }, + { + "epoch": 1.7738181818181817, + "grad_norm": 1.1397002935409546, + "learning_rate": 1.1802978163306072e-05, + "loss": 0.2232, + "step": 610 + }, + { + "epoch": 1.7883636363636364, + "grad_norm": 1.7461051940917969, + "learning_rate": 1.1568495244420421e-05, + "loss": 0.2174, + "step": 615 + }, + { + "epoch": 1.802909090909091, + "grad_norm": 1.210522174835205, + "learning_rate": 1.1334893907931587e-05, + "loss": 0.1889, + "step": 620 + }, + { + "epoch": 1.8174545454545454, + "grad_norm": 1.4002803564071655, + "learning_rate": 1.1102234167985209e-05, + "loss": 0.1914, + "step": 625 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 1.4791102409362793, + "learning_rate": 1.087057579682284e-05, + "loss": 0.1976, + "step": 630 + }, + { + "epoch": 1.8465454545454545, + "grad_norm": 1.4166333675384521, + "learning_rate": 1.0639978309425997e-05, + "loss": 0.2155, + "step": 635 + }, + { + "epoch": 1.8610909090909091, + "grad_norm": 1.3550053834915161, + "learning_rate": 1.0410500948226247e-05, + "loss": 0.176, + "step": 640 + }, + { + "epoch": 1.8756363636363638, + "grad_norm": 1.3545359373092651, + "learning_rate": 1.0182202667885317e-05, + "loss": 0.1743, + "step": 645 + }, + { + "epoch": 1.8901818181818182, + "grad_norm": 1.2565267086029053, + "learning_rate": 9.955142120149176e-06, + "loss": 0.2164, + "step": 650 + }, + { + "epoch": 1.9047272727272726, + "grad_norm": 1.1921734809875488, + "learning_rate": 9.729377638779859e-06, + "loss": 0.167, + "step": 655 + }, + { + "epoch": 1.9192727272727272, + "grad_norm": 1.4197113513946533, + "learning_rate": 9.5049672245691e-06, + "loss": 0.1849, + "step": 660 + }, + { + "epoch": 1.9338181818181819, + "grad_norm": 1.1598623991012573, + "learning_rate": 9.281968530437374e-06, + "loss": 0.1702, + "step": 665 + }, + { + "epoch": 1.9483636363636365, + "grad_norm": 1.4957537651062012, + "learning_rate": 9.060438846622436e-06, + "loss": 0.2044, + "step": 670 + }, + { + "epoch": 1.962909090909091, + "grad_norm": 1.28658127784729, + "learning_rate": 8.840435085960932e-06, + "loss": 0.1634, + "step": 675 + }, + { + "epoch": 1.9774545454545454, + "grad_norm": 1.53742516040802, + "learning_rate": 8.62201376926703e-06, + "loss": 0.1703, + "step": 680 + }, + { + "epoch": 1.992, + "grad_norm": 1.1815738677978516, + "learning_rate": 8.405231010811771e-06, + "loss": 0.1875, + "step": 685 + }, + { + "epoch": 2.005818181818182, + "grad_norm": 1.4712494611740112, + "learning_rate": 8.190142503906798e-06, + "loss": 0.1449, + "step": 690 + }, + { + "epoch": 2.0203636363636366, + "grad_norm": 1.1502342224121094, + "learning_rate": 7.976803506596316e-06, + "loss": 0.1161, + "step": 695 + }, + { + "epoch": 2.034909090909091, + "grad_norm": 1.1868095397949219, + "learning_rate": 7.765268827460797e-06, + "loss": 0.1205, + "step": 700 + }, + { + "epoch": 2.0494545454545454, + "grad_norm": 1.3847911357879639, + "learning_rate": 7.555592811536254e-06, + "loss": 0.1114, + "step": 705 + }, + { + "epoch": 2.064, + "grad_norm": 1.1378973722457886, + "learning_rate": 7.347829326352459e-06, + "loss": 0.1311, + "step": 710 + }, + { + "epoch": 2.0785454545454547, + "grad_norm": 1.1418230533599854, + "learning_rate": 7.142031748094016e-06, + "loss": 0.1208, + "step": 715 + }, + { + "epoch": 2.093090909090909, + "grad_norm": 1.0068163871765137, + "learning_rate": 6.93825294788751e-06, + "loss": 0.1534, + "step": 720 + }, + { + "epoch": 2.1076363636363635, + "grad_norm": 0.9801628589630127, + "learning_rate": 6.736545278218464e-06, + "loss": 0.1335, + "step": 725 + }, + { + "epoch": 2.122181818181818, + "grad_norm": 1.0094932317733765, + "learning_rate": 6.536960559481605e-06, + "loss": 0.1207, + "step": 730 + }, + { + "epoch": 2.136727272727273, + "grad_norm": 1.2258855104446411, + "learning_rate": 6.339550066667711e-06, + "loss": 0.1439, + "step": 735 + }, + { + "epoch": 2.1512727272727274, + "grad_norm": 1.0061067342758179, + "learning_rate": 6.144364516190662e-06, + "loss": 0.1151, + "step": 740 + }, + { + "epoch": 2.1658181818181816, + "grad_norm": 0.997409999370575, + "learning_rate": 5.951454052857953e-06, + "loss": 0.1163, + "step": 745 + }, + { + "epoch": 2.1803636363636363, + "grad_norm": 1.3681968450546265, + "learning_rate": 5.760868236988102e-06, + "loss": 0.1332, + "step": 750 + }, + { + "epoch": 2.194909090909091, + "grad_norm": 1.0326811075210571, + "learning_rate": 5.572656031678146e-06, + "loss": 0.1127, + "step": 755 + }, + { + "epoch": 2.2094545454545456, + "grad_norm": 1.0966483354568481, + "learning_rate": 5.386865790224638e-06, + "loss": 0.1165, + "step": 760 + }, + { + "epoch": 2.224, + "grad_norm": 1.275245189666748, + "learning_rate": 5.203545243701269e-06, + "loss": 0.1258, + "step": 765 + }, + { + "epoch": 2.2385454545454544, + "grad_norm": 0.8312518000602722, + "learning_rate": 5.022741488696368e-06, + "loss": 0.1235, + "step": 770 + }, + { + "epoch": 2.253090909090909, + "grad_norm": 1.1335837841033936, + "learning_rate": 4.8445009752133615e-06, + "loss": 0.1202, + "step": 775 + }, + { + "epoch": 2.2676363636363637, + "grad_norm": 1.077192783355713, + "learning_rate": 4.668869494737406e-06, + "loss": 0.1231, + "step": 780 + }, + { + "epoch": 2.2821818181818183, + "grad_norm": 1.0837284326553345, + "learning_rate": 4.49589216847118e-06, + "loss": 0.1085, + "step": 785 + }, + { + "epoch": 2.2967272727272725, + "grad_norm": 0.9416809678077698, + "learning_rate": 4.325613435742814e-06, + "loss": 0.105, + "step": 790 + }, + { + "epoch": 2.311272727272727, + "grad_norm": 1.0639972686767578, + "learning_rate": 4.158077042589129e-06, + "loss": 0.0985, + "step": 795 + }, + { + "epoch": 2.325818181818182, + "grad_norm": 1.0494441986083984, + "learning_rate": 3.9933260305168436e-06, + "loss": 0.1182, + "step": 800 + }, + { + "epoch": 2.3403636363636364, + "grad_norm": 0.8486209511756897, + "learning_rate": 3.831402725444897e-06, + "loss": 0.1388, + "step": 805 + }, + { + "epoch": 2.354909090909091, + "grad_norm": 1.093483805656433, + "learning_rate": 3.6723487268305327e-06, + "loss": 0.1095, + "step": 810 + }, + { + "epoch": 2.3694545454545457, + "grad_norm": 0.8943374752998352, + "learning_rate": 3.5162048969820787e-06, + "loss": 0.1205, + "step": 815 + }, + { + "epoch": 2.384, + "grad_norm": 0.9319124221801758, + "learning_rate": 3.3630113505610523e-06, + "loss": 0.1006, + "step": 820 + }, + { + "epoch": 2.3985454545454545, + "grad_norm": 0.8214781284332275, + "learning_rate": 3.212807444276365e-06, + "loss": 0.1, + "step": 825 + }, + { + "epoch": 2.413090909090909, + "grad_norm": 1.0242372751235962, + "learning_rate": 3.065631766773286e-06, + "loss": 0.0907, + "step": 830 + }, + { + "epoch": 2.427636363636364, + "grad_norm": 0.9746888875961304, + "learning_rate": 2.921522128719658e-06, + "loss": 0.1019, + "step": 835 + }, + { + "epoch": 2.442181818181818, + "grad_norm": 0.9537249207496643, + "learning_rate": 2.780515553092038e-06, + "loss": 0.0992, + "step": 840 + }, + { + "epoch": 2.4567272727272726, + "grad_norm": 0.9641470909118652, + "learning_rate": 2.642648265664175e-06, + "loss": 0.123, + "step": 845 + }, + { + "epoch": 2.4712727272727273, + "grad_norm": 1.1135350465774536, + "learning_rate": 2.5079556857003256e-06, + "loss": 0.1023, + "step": 850 + }, + { + "epoch": 2.485818181818182, + "grad_norm": 0.7364502549171448, + "learning_rate": 2.3764724168557027e-06, + "loss": 0.0888, + "step": 855 + }, + { + "epoch": 2.5003636363636366, + "grad_norm": 0.8220189213752747, + "learning_rate": 2.248232238286562e-06, + "loss": 0.1053, + "step": 860 + }, + { + "epoch": 2.5149090909090908, + "grad_norm": 0.759765625, + "learning_rate": 2.1232680959720085e-06, + "loss": 0.1017, + "step": 865 + }, + { + "epoch": 2.5294545454545454, + "grad_norm": 0.744757354259491, + "learning_rate": 2.00161209424992e-06, + "loss": 0.1082, + "step": 870 + }, + { + "epoch": 2.544, + "grad_norm": 0.8105723261833191, + "learning_rate": 1.8832954875690656e-06, + "loss": 0.0953, + "step": 875 + }, + { + "epoch": 2.5585454545454547, + "grad_norm": 0.9727612137794495, + "learning_rate": 1.768348672459575e-06, + "loss": 0.1111, + "step": 880 + }, + { + "epoch": 2.573090909090909, + "grad_norm": 0.8123570084571838, + "learning_rate": 1.6568011797238247e-06, + "loss": 0.1108, + "step": 885 + }, + { + "epoch": 2.5876363636363635, + "grad_norm": 0.9021852612495422, + "learning_rate": 1.5486816668497046e-06, + "loss": 0.1046, + "step": 890 + }, + { + "epoch": 2.602181818181818, + "grad_norm": 0.9041488170623779, + "learning_rate": 1.4440179106482987e-06, + "loss": 0.1091, + "step": 895 + }, + { + "epoch": 2.616727272727273, + "grad_norm": 0.9290461540222168, + "learning_rate": 1.342836800117762e-06, + "loss": 0.0932, + "step": 900 + }, + { + "epoch": 2.6312727272727274, + "grad_norm": 0.8779100179672241, + "learning_rate": 1.2451643295353238e-06, + "loss": 0.0923, + "step": 905 + }, + { + "epoch": 2.645818181818182, + "grad_norm": 1.0028347969055176, + "learning_rate": 1.151025591779139e-06, + "loss": 0.1164, + "step": 910 + }, + { + "epoch": 2.6603636363636363, + "grad_norm": 0.8429099917411804, + "learning_rate": 1.0604447718817212e-06, + "loss": 0.0976, + "step": 915 + }, + { + "epoch": 2.674909090909091, + "grad_norm": 0.7575308084487915, + "learning_rate": 9.73445140816585e-07, + "loss": 0.0882, + "step": 920 + }, + { + "epoch": 2.6894545454545455, + "grad_norm": 0.8409306406974792, + "learning_rate": 8.900490495197627e-07, + "loss": 0.112, + "step": 925 + }, + { + "epoch": 2.7039999999999997, + "grad_norm": 0.8559815883636475, + "learning_rate": 8.102779231476482e-07, + "loss": 0.0999, + "step": 930 + }, + { + "epoch": 2.7185454545454544, + "grad_norm": 0.7667560577392578, + "learning_rate": 7.341522555726971e-07, + "loss": 0.1007, + "step": 935 + }, + { + "epoch": 2.733090909090909, + "grad_norm": 0.8768170475959778, + "learning_rate": 6.616916041183718e-07, + "loss": 0.1099, + "step": 940 + }, + { + "epoch": 2.7476363636363637, + "grad_norm": 0.8255895972251892, + "learning_rate": 5.929145845347106e-07, + "loss": 0.0984, + "step": 945 + }, + { + "epoch": 2.7621818181818183, + "grad_norm": 0.8067662119865417, + "learning_rate": 5.278388662157846e-07, + "loss": 0.0868, + "step": 950 + }, + { + "epoch": 2.776727272727273, + "grad_norm": 0.7714850902557373, + "learning_rate": 4.6648116766027095e-07, + "loss": 0.0877, + "step": 955 + }, + { + "epoch": 2.791272727272727, + "grad_norm": 0.7739673852920532, + "learning_rate": 4.0885725217634337e-07, + "loss": 0.09, + "step": 960 + }, + { + "epoch": 2.8058181818181818, + "grad_norm": 0.9548351764678955, + "learning_rate": 3.549819238319385e-07, + "loss": 0.0864, + "step": 965 + }, + { + "epoch": 2.8203636363636364, + "grad_norm": 0.7386201024055481, + "learning_rate": 3.0486902365146787e-07, + "loss": 0.0934, + "step": 970 + }, + { + "epoch": 2.834909090909091, + "grad_norm": 1.0041216611862183, + "learning_rate": 2.585314260599425e-07, + "loss": 0.102, + "step": 975 + }, + { + "epoch": 2.8494545454545452, + "grad_norm": 0.9662683606147766, + "learning_rate": 2.1598103557542715e-07, + "loss": 0.0805, + "step": 980 + }, + { + "epoch": 2.864, + "grad_norm": 0.7833965420722961, + "learning_rate": 1.7722878375066475e-07, + "loss": 0.1036, + "step": 985 + }, + { + "epoch": 2.8785454545454545, + "grad_norm": 0.8982352614402771, + "learning_rate": 1.4228462636467487e-07, + "loss": 0.1006, + "step": 990 + }, + { + "epoch": 2.893090909090909, + "grad_norm": 0.6871171593666077, + "learning_rate": 1.111575408650245e-07, + "loss": 0.0968, + "step": 995 + }, + { + "epoch": 2.907636363636364, + "grad_norm": 0.7357356548309326, + "learning_rate": 8.385552406145302e-08, + "loss": 0.0836, + "step": 1000 + }, + { + "epoch": 2.9221818181818184, + "grad_norm": 0.9489333629608154, + "learning_rate": 6.038559007141397e-08, + "loss": 0.089, + "step": 1005 + }, + { + "epoch": 2.9367272727272726, + "grad_norm": 0.8098792433738708, + "learning_rate": 4.075376851810308e-08, + "loss": 0.0961, + "step": 1010 + }, + { + "epoch": 2.9512727272727273, + "grad_norm": 0.8689836859703064, + "learning_rate": 2.4965102981387432e-08, + "loss": 0.0965, + "step": 1015 + }, + { + "epoch": 2.965818181818182, + "grad_norm": 0.7752353549003601, + "learning_rate": 1.3023649702066931e-08, + "loss": 0.086, + "step": 1020 + }, + { + "epoch": 2.980363636363636, + "grad_norm": 0.761755108833313, + "learning_rate": 4.9324765398028884e-09, + "loss": 0.1024, + "step": 1025 + }, + { + "epoch": 2.9949090909090907, + "grad_norm": 0.8055518865585327, + "learning_rate": 6.936621849451541e-10, + "loss": 0.0868, + "step": 1030 + }, + { + "epoch": 3.0, + "step": 1032, + "total_flos": 1.290247299649241e+18, + "train_loss": 0.45301662641035956, + "train_runtime": 1031.036, + "train_samples_per_second": 31.986, + "train_steps_per_second": 1.001 + } + ], + "logging_steps": 5, + "max_steps": 1032, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.290247299649241e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0f10f1cfcdea9e43f983832373cae0677207c41 --- /dev/null +++ b/hotpotqa_train_knowledge_50_base/9_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f744cfca0f3324978eb22485e882926d287d8b7c89c33dcd52c06fbf54d6b0e4 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ee81c775ab17bd950d55a4f9fceaf521933bba01 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..691e14e93b435f0b794a6485af68be8a500f103a --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "gate_proj", + "v_proj", + "up_proj", + "q_proj", + "o_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e8a2924a956676cc131a37dbf608a4b99fa040a --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df274471dab7c20a5c87f4d2e7675b820bc11aadd73279f442e24fc9bfc7d3c +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2258a9c5c228ea7081969e44d05e71c17c2a0bdb --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2487673941600502e+18, + "train_loss": 0.41914580349191355, + "train_runtime": 553.5753, + "train_samples": 10926, + "train_samples_per_second": 59.211, + "train_steps_per_second": 1.853 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2258a9c5c228ea7081969e44d05e71c17c2a0bdb --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2487673941600502e+18, + "train_loss": 0.41914580349191355, + "train_runtime": 553.5753, + "train_samples": 10926, + "train_samples_per_second": 59.211, + "train_steps_per_second": 1.853 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a273588492015e6f3f0de29c08b220d18c8f427b --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1478 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1026, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014641288433382138, + "grad_norm": 0.7126476764678955, + "learning_rate": 2.307692307692308e-06, + "loss": 1.6933, + "step": 5 + }, + { + "epoch": 0.029282576866764276, + "grad_norm": 0.639087438583374, + "learning_rate": 5.192307692307692e-06, + "loss": 1.6953, + "step": 10 + }, + { + "epoch": 0.043923865300146414, + "grad_norm": 0.5422314405441284, + "learning_rate": 8.076923076923077e-06, + "loss": 1.696, + "step": 15 + }, + { + "epoch": 0.05856515373352855, + "grad_norm": 0.5153018832206726, + "learning_rate": 1.0961538461538462e-05, + "loss": 1.6229, + "step": 20 + }, + { + "epoch": 0.07320644216691069, + "grad_norm": 0.5513389110565186, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.6007, + "step": 25 + }, + { + "epoch": 0.08784773060029283, + "grad_norm": 0.5201866626739502, + "learning_rate": 1.673076923076923e-05, + "loss": 1.6314, + "step": 30 + }, + { + "epoch": 0.10248901903367497, + "grad_norm": 0.47078511118888855, + "learning_rate": 1.9615384615384617e-05, + "loss": 1.6091, + "step": 35 + }, + { + "epoch": 0.1171303074670571, + "grad_norm": 0.47075146436691284, + "learning_rate": 2.25e-05, + "loss": 1.4805, + "step": 40 + }, + { + "epoch": 0.13177159590043924, + "grad_norm": 0.483316034078598, + "learning_rate": 2.5384615384615386e-05, + "loss": 1.5093, + "step": 45 + }, + { + "epoch": 0.14641288433382138, + "grad_norm": 0.5242477059364319, + "learning_rate": 2.8269230769230768e-05, + "loss": 1.5643, + "step": 50 + }, + { + "epoch": 0.16105417276720352, + "grad_norm": 0.49081480503082275, + "learning_rate": 2.9999687894386234e-05, + "loss": 1.4637, + "step": 55 + }, + { + "epoch": 0.17569546120058566, + "grad_norm": 0.6533915996551514, + "learning_rate": 2.9996176855388602e-05, + "loss": 1.3972, + "step": 60 + }, + { + "epoch": 0.1903367496339678, + "grad_norm": 0.5995233058929443, + "learning_rate": 2.9988765561582723e-05, + "loss": 1.3619, + "step": 65 + }, + { + "epoch": 0.20497803806734993, + "grad_norm": 0.685286819934845, + "learning_rate": 2.9977455940522168e-05, + "loss": 1.3187, + "step": 70 + }, + { + "epoch": 0.21961932650073207, + "grad_norm": 0.6270342469215393, + "learning_rate": 2.9962250933650244e-05, + "loss": 1.288, + "step": 75 + }, + { + "epoch": 0.2342606149341142, + "grad_norm": 0.6835029125213623, + "learning_rate": 2.9943154495534944e-05, + "loss": 1.2874, + "step": 80 + }, + { + "epoch": 0.24890190336749635, + "grad_norm": 0.6566820740699768, + "learning_rate": 2.992017159284047e-05, + "loss": 1.2818, + "step": 85 + }, + { + "epoch": 0.2635431918008785, + "grad_norm": 0.7033196091651917, + "learning_rate": 2.9893308203035447e-05, + "loss": 1.1978, + "step": 90 + }, + { + "epoch": 0.2781844802342606, + "grad_norm": 0.7099453806877136, + "learning_rate": 2.9862571312838302e-05, + "loss": 1.1492, + "step": 95 + }, + { + "epoch": 0.29282576866764276, + "grad_norm": 0.7331957221031189, + "learning_rate": 2.982796891640015e-05, + "loss": 1.1423, + "step": 100 + }, + { + "epoch": 0.3074670571010249, + "grad_norm": 0.776106059551239, + "learning_rate": 2.9789510013225617e-05, + "loss": 1.1149, + "step": 105 + }, + { + "epoch": 0.32210834553440704, + "grad_norm": 0.8852845430374146, + "learning_rate": 2.9747204605832248e-05, + "loss": 1.0768, + "step": 110 + }, + { + "epoch": 0.3367496339677892, + "grad_norm": 0.9155844449996948, + "learning_rate": 2.9701063697148998e-05, + "loss": 1.0751, + "step": 115 + }, + { + "epoch": 0.3513909224011713, + "grad_norm": 0.849168598651886, + "learning_rate": 2.965109928765457e-05, + "loss": 1.0465, + "step": 120 + }, + { + "epoch": 0.36603221083455345, + "grad_norm": 0.8271523118019104, + "learning_rate": 2.9597324372256287e-05, + "loss": 1.0078, + "step": 125 + }, + { + "epoch": 0.3806734992679356, + "grad_norm": 0.7945655584335327, + "learning_rate": 2.953975293691032e-05, + "loss": 1.0109, + "step": 130 + }, + { + "epoch": 0.3953147877013177, + "grad_norm": 0.8860719203948975, + "learning_rate": 2.9478399954984196e-05, + "loss": 0.9854, + "step": 135 + }, + { + "epoch": 0.40995607613469986, + "grad_norm": 0.9196994304656982, + "learning_rate": 2.9413281383362467e-05, + "loss": 0.9267, + "step": 140 + }, + { + "epoch": 0.424597364568082, + "grad_norm": 1.0179252624511719, + "learning_rate": 2.9344414158296585e-05, + "loss": 0.9301, + "step": 145 + }, + { + "epoch": 0.43923865300146414, + "grad_norm": 1.0411779880523682, + "learning_rate": 2.9271816191000075e-05, + "loss": 0.8559, + "step": 150 + }, + { + "epoch": 0.4538799414348463, + "grad_norm": 1.033353567123413, + "learning_rate": 2.9195506362990113e-05, + "loss": 0.9518, + "step": 155 + }, + { + "epoch": 0.4685212298682284, + "grad_norm": 1.0611979961395264, + "learning_rate": 2.91155045211768e-05, + "loss": 0.8428, + "step": 160 + }, + { + "epoch": 0.48316251830161056, + "grad_norm": 1.1200586557388306, + "learning_rate": 2.9031831472701248e-05, + "loss": 0.8365, + "step": 165 + }, + { + "epoch": 0.4978038067349927, + "grad_norm": 1.0776511430740356, + "learning_rate": 2.8944508979524044e-05, + "loss": 0.793, + "step": 170 + }, + { + "epoch": 0.5124450951683748, + "grad_norm": 1.0723485946655273, + "learning_rate": 2.885355975276531e-05, + "loss": 0.7683, + "step": 175 + }, + { + "epoch": 0.527086383601757, + "grad_norm": 1.2179584503173828, + "learning_rate": 2.8759007446797916e-05, + "loss": 0.8462, + "step": 180 + }, + { + "epoch": 0.541727672035139, + "grad_norm": 1.1088677644729614, + "learning_rate": 2.8660876653095372e-05, + "loss": 0.7948, + "step": 185 + }, + { + "epoch": 0.5563689604685212, + "grad_norm": 1.0437426567077637, + "learning_rate": 2.8559192893836018e-05, + "loss": 0.7359, + "step": 190 + }, + { + "epoch": 0.5710102489019033, + "grad_norm": 1.1263331174850464, + "learning_rate": 2.84539826152651e-05, + "loss": 0.7012, + "step": 195 + }, + { + "epoch": 0.5856515373352855, + "grad_norm": 1.3113230466842651, + "learning_rate": 2.8345273180816564e-05, + "loss": 0.6952, + "step": 200 + }, + { + "epoch": 0.6002928257686676, + "grad_norm": 1.2912628650665283, + "learning_rate": 2.8233092863996294e-05, + "loss": 0.7685, + "step": 205 + }, + { + "epoch": 0.6149341142020498, + "grad_norm": 1.0843563079833984, + "learning_rate": 2.811747084102862e-05, + "loss": 0.7441, + "step": 210 + }, + { + "epoch": 0.6295754026354319, + "grad_norm": 1.1460975408554077, + "learning_rate": 2.7998437183268078e-05, + "loss": 0.6179, + "step": 215 + }, + { + "epoch": 0.6442166910688141, + "grad_norm": 1.3173315525054932, + "learning_rate": 2.7876022849378377e-05, + "loss": 0.6683, + "step": 220 + }, + { + "epoch": 0.6588579795021962, + "grad_norm": 1.2191557884216309, + "learning_rate": 2.7750259677280527e-05, + "loss": 0.6771, + "step": 225 + }, + { + "epoch": 0.6734992679355783, + "grad_norm": 1.2360891103744507, + "learning_rate": 2.7621180375872376e-05, + "loss": 0.6215, + "step": 230 + }, + { + "epoch": 0.6881405563689604, + "grad_norm": 1.1903656721115112, + "learning_rate": 2.7488818516521524e-05, + "loss": 0.6748, + "step": 235 + }, + { + "epoch": 0.7027818448023426, + "grad_norm": 1.150562047958374, + "learning_rate": 2.7353208524334014e-05, + "loss": 0.6009, + "step": 240 + }, + { + "epoch": 0.7174231332357247, + "grad_norm": 1.267564296722412, + "learning_rate": 2.7214385669200905e-05, + "loss": 0.6921, + "step": 245 + }, + { + "epoch": 0.7320644216691069, + "grad_norm": 1.2788525819778442, + "learning_rate": 2.707238605662518e-05, + "loss": 0.6333, + "step": 250 + }, + { + "epoch": 0.746705710102489, + "grad_norm": 1.1281287670135498, + "learning_rate": 2.692724661833131e-05, + "loss": 0.554, + "step": 255 + }, + { + "epoch": 0.7613469985358712, + "grad_norm": 1.2498953342437744, + "learning_rate": 2.677900510265993e-05, + "loss": 0.6016, + "step": 260 + }, + { + "epoch": 0.7759882869692533, + "grad_norm": 1.2376540899276733, + "learning_rate": 2.6627700064750115e-05, + "loss": 0.5205, + "step": 265 + }, + { + "epoch": 0.7906295754026355, + "grad_norm": 1.396138072013855, + "learning_rate": 2.647337085651184e-05, + "loss": 0.5768, + "step": 270 + }, + { + "epoch": 0.8052708638360175, + "grad_norm": 1.2267885208129883, + "learning_rate": 2.631605761639121e-05, + "loss": 0.5454, + "step": 275 + }, + { + "epoch": 0.8199121522693997, + "grad_norm": 1.1411751508712769, + "learning_rate": 2.6155801258931115e-05, + "loss": 0.5058, + "step": 280 + }, + { + "epoch": 0.8345534407027818, + "grad_norm": 1.2383480072021484, + "learning_rate": 2.5992643464130054e-05, + "loss": 0.5551, + "step": 285 + }, + { + "epoch": 0.849194729136164, + "grad_norm": 1.102262258529663, + "learning_rate": 2.582662666660185e-05, + "loss": 0.5241, + "step": 290 + }, + { + "epoch": 0.8638360175695461, + "grad_norm": 1.2751238346099854, + "learning_rate": 2.5657794044539114e-05, + "loss": 0.5062, + "step": 295 + }, + { + "epoch": 0.8784773060029283, + "grad_norm": 1.335439682006836, + "learning_rate": 2.5486189508483345e-05, + "loss": 0.5005, + "step": 300 + }, + { + "epoch": 0.8931185944363104, + "grad_norm": 1.3290393352508545, + "learning_rate": 2.5311857689904497e-05, + "loss": 0.4967, + "step": 305 + }, + { + "epoch": 0.9077598828696926, + "grad_norm": 1.285927414894104, + "learning_rate": 2.5134843929593113e-05, + "loss": 0.4631, + "step": 310 + }, + { + "epoch": 0.9224011713030746, + "grad_norm": 1.2296510934829712, + "learning_rate": 2.4955194265867916e-05, + "loss": 0.4925, + "step": 315 + }, + { + "epoch": 0.9370424597364568, + "grad_norm": 1.33950936794281, + "learning_rate": 2.4772955422602032e-05, + "loss": 0.4547, + "step": 320 + }, + { + "epoch": 0.9516837481698389, + "grad_norm": 1.101182460784912, + "learning_rate": 2.4588174797070883e-05, + "loss": 0.5063, + "step": 325 + }, + { + "epoch": 0.9663250366032211, + "grad_norm": 1.3146883249282837, + "learning_rate": 2.4400900447624955e-05, + "loss": 0.4582, + "step": 330 + }, + { + "epoch": 0.9809663250366032, + "grad_norm": 1.269517183303833, + "learning_rate": 2.4211181081190606e-05, + "loss": 0.4823, + "step": 335 + }, + { + "epoch": 0.9956076134699854, + "grad_norm": 1.7174859046936035, + "learning_rate": 2.4019066040602245e-05, + "loss": 0.4269, + "step": 340 + }, + { + "epoch": 1.0087847730600292, + "grad_norm": 1.1580455303192139, + "learning_rate": 2.3824605291769043e-05, + "loss": 0.3734, + "step": 345 + }, + { + "epoch": 1.0234260614934114, + "grad_norm": 1.3763182163238525, + "learning_rate": 2.3627849410679667e-05, + "loss": 0.3793, + "step": 350 + }, + { + "epoch": 1.0380673499267936, + "grad_norm": 1.383939504623413, + "learning_rate": 2.3428849570248295e-05, + "loss": 0.3886, + "step": 355 + }, + { + "epoch": 1.0527086383601758, + "grad_norm": 1.2801769971847534, + "learning_rate": 2.322765752700541e-05, + "loss": 0.3456, + "step": 360 + }, + { + "epoch": 1.0673499267935578, + "grad_norm": 1.1529755592346191, + "learning_rate": 2.3024325607636782e-05, + "loss": 0.3625, + "step": 365 + }, + { + "epoch": 1.08199121522694, + "grad_norm": 1.1384326219558716, + "learning_rate": 2.2818906695374164e-05, + "loss": 0.3361, + "step": 370 + }, + { + "epoch": 1.0966325036603222, + "grad_norm": 1.3013032674789429, + "learning_rate": 2.2611454216241273e-05, + "loss": 0.333, + "step": 375 + }, + { + "epoch": 1.1112737920937041, + "grad_norm": 1.1994627714157104, + "learning_rate": 2.2402022125158564e-05, + "loss": 0.3492, + "step": 380 + }, + { + "epoch": 1.1259150805270863, + "grad_norm": 1.2616736888885498, + "learning_rate": 2.2190664891910426e-05, + "loss": 0.3175, + "step": 385 + }, + { + "epoch": 1.1405563689604685, + "grad_norm": 1.147693395614624, + "learning_rate": 2.1977437486978562e-05, + "loss": 0.3556, + "step": 390 + }, + { + "epoch": 1.1551976573938507, + "grad_norm": 1.151597261428833, + "learning_rate": 2.1762395367245036e-05, + "loss": 0.3173, + "step": 395 + }, + { + "epoch": 1.169838945827233, + "grad_norm": 1.3126654624938965, + "learning_rate": 2.1545594461568883e-05, + "loss": 0.3265, + "step": 400 + }, + { + "epoch": 1.1844802342606149, + "grad_norm": 1.1124619245529175, + "learning_rate": 2.1327091156239953e-05, + "loss": 0.3254, + "step": 405 + }, + { + "epoch": 1.199121522693997, + "grad_norm": 1.4071533679962158, + "learning_rate": 2.1106942280313778e-05, + "loss": 0.2756, + "step": 410 + }, + { + "epoch": 1.2137628111273793, + "grad_norm": 1.2208887338638306, + "learning_rate": 2.0885205090831275e-05, + "loss": 0.321, + "step": 415 + }, + { + "epoch": 1.2284040995607612, + "grad_norm": 1.4007987976074219, + "learning_rate": 2.0661937257927164e-05, + "loss": 0.3281, + "step": 420 + }, + { + "epoch": 1.2430453879941434, + "grad_norm": 1.2766481637954712, + "learning_rate": 2.0437196849830908e-05, + "loss": 0.3006, + "step": 425 + }, + { + "epoch": 1.2576866764275256, + "grad_norm": 1.1827428340911865, + "learning_rate": 2.0211042317764158e-05, + "loss": 0.2955, + "step": 430 + }, + { + "epoch": 1.2723279648609078, + "grad_norm": 1.206972599029541, + "learning_rate": 1.9983532480738567e-05, + "loss": 0.2909, + "step": 435 + }, + { + "epoch": 1.28696925329429, + "grad_norm": 1.1884872913360596, + "learning_rate": 1.975472651025793e-05, + "loss": 0.3021, + "step": 440 + }, + { + "epoch": 1.301610541727672, + "grad_norm": 1.3749642372131348, + "learning_rate": 1.952468391492868e-05, + "loss": 0.3018, + "step": 445 + }, + { + "epoch": 1.3162518301610542, + "grad_norm": 1.1364061832427979, + "learning_rate": 1.9293464524982695e-05, + "loss": 0.3096, + "step": 450 + }, + { + "epoch": 1.3308931185944364, + "grad_norm": 1.1458829641342163, + "learning_rate": 1.9061128476716454e-05, + "loss": 0.2676, + "step": 455 + }, + { + "epoch": 1.3455344070278183, + "grad_norm": 1.1433660984039307, + "learning_rate": 1.8827736196850594e-05, + "loss": 0.2578, + "step": 460 + }, + { + "epoch": 1.3601756954612005, + "grad_norm": 1.2813657522201538, + "learning_rate": 1.8593348386813908e-05, + "loss": 0.3068, + "step": 465 + }, + { + "epoch": 1.3748169838945827, + "grad_norm": 1.1621493101119995, + "learning_rate": 1.8358026006955967e-05, + "loss": 0.2544, + "step": 470 + }, + { + "epoch": 1.389458272327965, + "grad_norm": 1.318882942199707, + "learning_rate": 1.8121830260692294e-05, + "loss": 0.282, + "step": 475 + }, + { + "epoch": 1.4040995607613471, + "grad_norm": 1.2115817070007324, + "learning_rate": 1.7884822578586426e-05, + "loss": 0.2281, + "step": 480 + }, + { + "epoch": 1.418740849194729, + "grad_norm": 1.1523452997207642, + "learning_rate": 1.7647064602372828e-05, + "loss": 0.2525, + "step": 485 + }, + { + "epoch": 1.4333821376281113, + "grad_norm": 1.2095608711242676, + "learning_rate": 1.7408618168924913e-05, + "loss": 0.2692, + "step": 490 + }, + { + "epoch": 1.4480234260614935, + "grad_norm": 1.242586374282837, + "learning_rate": 1.7169545294172315e-05, + "loss": 0.2758, + "step": 495 + }, + { + "epoch": 1.4626647144948755, + "grad_norm": 1.1412001848220825, + "learning_rate": 1.6929908156971567e-05, + "loss": 0.2591, + "step": 500 + }, + { + "epoch": 1.4773060029282576, + "grad_norm": 1.1546791791915894, + "learning_rate": 1.668976908293443e-05, + "loss": 0.2521, + "step": 505 + }, + { + "epoch": 1.4919472913616398, + "grad_norm": 1.343634009361267, + "learning_rate": 1.644919052821805e-05, + "loss": 0.2317, + "step": 510 + }, + { + "epoch": 1.506588579795022, + "grad_norm": 1.1909961700439453, + "learning_rate": 1.620823506328113e-05, + "loss": 0.2276, + "step": 515 + }, + { + "epoch": 1.5212298682284042, + "grad_norm": 1.3918815851211548, + "learning_rate": 1.5966965356610438e-05, + "loss": 0.2272, + "step": 520 + }, + { + "epoch": 1.5358711566617862, + "grad_norm": 1.468557357788086, + "learning_rate": 1.5725444158421738e-05, + "loss": 0.2362, + "step": 525 + }, + { + "epoch": 1.5505124450951684, + "grad_norm": 1.0166301727294922, + "learning_rate": 1.5483734284339564e-05, + "loss": 0.2327, + "step": 530 + }, + { + "epoch": 1.5651537335285504, + "grad_norm": 1.3065682649612427, + "learning_rate": 1.524189859905987e-05, + "loss": 0.2289, + "step": 535 + }, + { + "epoch": 1.5797950219619326, + "grad_norm": 1.2681663036346436, + "learning_rate": 1.5e-05, + "loss": 0.2463, + "step": 540 + }, + { + "epoch": 1.5944363103953147, + "grad_norm": 1.1603702306747437, + "learning_rate": 1.4758101400940131e-05, + "loss": 0.23, + "step": 545 + }, + { + "epoch": 1.609077598828697, + "grad_norm": 1.15736722946167, + "learning_rate": 1.4516265715660439e-05, + "loss": 0.2167, + "step": 550 + }, + { + "epoch": 1.6237188872620791, + "grad_norm": 1.1164824962615967, + "learning_rate": 1.427455584157826e-05, + "loss": 0.2289, + "step": 555 + }, + { + "epoch": 1.6383601756954613, + "grad_norm": 1.0983383655548096, + "learning_rate": 1.4033034643389571e-05, + "loss": 0.21, + "step": 560 + }, + { + "epoch": 1.6530014641288433, + "grad_norm": 1.0871089696884155, + "learning_rate": 1.3791764936718871e-05, + "loss": 0.1883, + "step": 565 + }, + { + "epoch": 1.6676427525622255, + "grad_norm": 1.5000489950180054, + "learning_rate": 1.3550809471781956e-05, + "loss": 0.1895, + "step": 570 + }, + { + "epoch": 1.6822840409956075, + "grad_norm": 1.2431613206863403, + "learning_rate": 1.3310230917065569e-05, + "loss": 0.2373, + "step": 575 + }, + { + "epoch": 1.6969253294289897, + "grad_norm": 1.1064904928207397, + "learning_rate": 1.3070091843028436e-05, + "loss": 0.2018, + "step": 580 + }, + { + "epoch": 1.7115666178623719, + "grad_norm": 1.1964831352233887, + "learning_rate": 1.2830454705827687e-05, + "loss": 0.2092, + "step": 585 + }, + { + "epoch": 1.726207906295754, + "grad_norm": 1.2428889274597168, + "learning_rate": 1.2591381831075091e-05, + "loss": 0.1936, + "step": 590 + }, + { + "epoch": 1.7408491947291362, + "grad_norm": 1.1626858711242676, + "learning_rate": 1.2352935397627178e-05, + "loss": 0.1782, + "step": 595 + }, + { + "epoch": 1.7554904831625184, + "grad_norm": 1.2806426286697388, + "learning_rate": 1.211517742141358e-05, + "loss": 0.1858, + "step": 600 + }, + { + "epoch": 1.7701317715959004, + "grad_norm": 1.308167815208435, + "learning_rate": 1.187816973930771e-05, + "loss": 0.1758, + "step": 605 + }, + { + "epoch": 1.7847730600292826, + "grad_norm": 1.036094307899475, + "learning_rate": 1.1641973993044039e-05, + "loss": 0.16, + "step": 610 + }, + { + "epoch": 1.7994143484626646, + "grad_norm": 1.2395319938659668, + "learning_rate": 1.1406651613186092e-05, + "loss": 0.1794, + "step": 615 + }, + { + "epoch": 1.8140556368960468, + "grad_norm": 1.110862374305725, + "learning_rate": 1.117226380314941e-05, + "loss": 0.1463, + "step": 620 + }, + { + "epoch": 1.828696925329429, + "grad_norm": 1.1255865097045898, + "learning_rate": 1.0938871523283545e-05, + "loss": 0.1641, + "step": 625 + }, + { + "epoch": 1.8433382137628112, + "grad_norm": 1.2140229940414429, + "learning_rate": 1.0706535475017309e-05, + "loss": 0.1695, + "step": 630 + }, + { + "epoch": 1.8579795021961933, + "grad_norm": 1.2039997577667236, + "learning_rate": 1.0475316085071322e-05, + "loss": 0.195, + "step": 635 + }, + { + "epoch": 1.8726207906295755, + "grad_norm": 1.2272205352783203, + "learning_rate": 1.0245273489742078e-05, + "loss": 0.1469, + "step": 640 + }, + { + "epoch": 1.8872620790629575, + "grad_norm": 1.4279520511627197, + "learning_rate": 1.0016467519261434e-05, + "loss": 0.1471, + "step": 645 + }, + { + "epoch": 1.9019033674963397, + "grad_norm": 1.1784698963165283, + "learning_rate": 9.788957682235845e-06, + "loss": 0.1526, + "step": 650 + }, + { + "epoch": 1.9165446559297217, + "grad_norm": 1.3334534168243408, + "learning_rate": 9.562803150169093e-06, + "loss": 0.1667, + "step": 655 + }, + { + "epoch": 1.9311859443631039, + "grad_norm": 1.1824780702590942, + "learning_rate": 9.338062742072837e-06, + "loss": 0.1352, + "step": 660 + }, + { + "epoch": 1.945827232796486, + "grad_norm": 1.076136827468872, + "learning_rate": 9.114794909168728e-06, + "loss": 0.1483, + "step": 665 + }, + { + "epoch": 1.9604685212298683, + "grad_norm": 1.2465894222259521, + "learning_rate": 8.893057719686225e-06, + "loss": 0.1611, + "step": 670 + }, + { + "epoch": 1.9751098096632504, + "grad_norm": 1.0261048078536987, + "learning_rate": 8.672908843760047e-06, + "loss": 0.133, + "step": 675 + }, + { + "epoch": 1.9897510980966326, + "grad_norm": 1.117666244506836, + "learning_rate": 8.454405538431125e-06, + "loss": 0.1461, + "step": 680 + }, + { + "epoch": 2.0029282576866763, + "grad_norm": 0.9117185473442078, + "learning_rate": 8.237604632754968e-06, + "loss": 0.1206, + "step": 685 + }, + { + "epoch": 2.0175695461200585, + "grad_norm": 1.0734974145889282, + "learning_rate": 8.022562513021443e-06, + "loss": 0.1068, + "step": 690 + }, + { + "epoch": 2.0322108345534406, + "grad_norm": 0.9563935399055481, + "learning_rate": 7.809335108089575e-06, + "loss": 0.1108, + "step": 695 + }, + { + "epoch": 2.046852122986823, + "grad_norm": 0.8566833138465881, + "learning_rate": 7.597977874841444e-06, + "loss": 0.1199, + "step": 700 + }, + { + "epoch": 2.061493411420205, + "grad_norm": 1.1050159931182861, + "learning_rate": 7.388545783758727e-06, + "loss": 0.1314, + "step": 705 + }, + { + "epoch": 2.0761346998535872, + "grad_norm": 1.112433671951294, + "learning_rate": 7.181093304625841e-06, + "loss": 0.1179, + "step": 710 + }, + { + "epoch": 2.0907759882869694, + "grad_norm": 0.9545454978942871, + "learning_rate": 6.975674392363223e-06, + "loss": 0.1165, + "step": 715 + }, + { + "epoch": 2.1054172767203516, + "grad_norm": 0.8509197235107422, + "learning_rate": 6.772342472994596e-06, + "loss": 0.0913, + "step": 720 + }, + { + "epoch": 2.1200585651537334, + "grad_norm": 0.922893762588501, + "learning_rate": 6.571150429751707e-06, + "loss": 0.0817, + "step": 725 + }, + { + "epoch": 2.1346998535871156, + "grad_norm": 0.8538981676101685, + "learning_rate": 6.372150589320339e-06, + "loss": 0.1085, + "step": 730 + }, + { + "epoch": 2.1493411420204978, + "grad_norm": 0.89898282289505, + "learning_rate": 6.1753947082309605e-06, + "loss": 0.1138, + "step": 735 + }, + { + "epoch": 2.16398243045388, + "grad_norm": 1.2213102579116821, + "learning_rate": 5.9809339593977555e-06, + "loss": 0.115, + "step": 740 + }, + { + "epoch": 2.178623718887262, + "grad_norm": 1.0277119874954224, + "learning_rate": 5.788818918809395e-06, + "loss": 0.1107, + "step": 745 + }, + { + "epoch": 2.1932650073206443, + "grad_norm": 0.9796687960624695, + "learning_rate": 5.599099552375048e-06, + "loss": 0.1034, + "step": 750 + }, + { + "epoch": 2.2079062957540265, + "grad_norm": 0.8411888480186462, + "learning_rate": 5.411825202929121e-06, + "loss": 0.1121, + "step": 755 + }, + { + "epoch": 2.2225475841874083, + "grad_norm": 0.9728362560272217, + "learning_rate": 5.227044577397972e-06, + "loss": 0.0962, + "step": 760 + }, + { + "epoch": 2.2371888726207905, + "grad_norm": 0.7702454328536987, + "learning_rate": 5.044805734132092e-06, + "loss": 0.0965, + "step": 765 + }, + { + "epoch": 2.2518301610541727, + "grad_norm": 0.8797982931137085, + "learning_rate": 4.865156070406892e-06, + "loss": 0.0838, + "step": 770 + }, + { + "epoch": 2.266471449487555, + "grad_norm": 0.6953938603401184, + "learning_rate": 4.6881423100955026e-06, + "loss": 0.1051, + "step": 775 + }, + { + "epoch": 2.281112737920937, + "grad_norm": 1.0766493082046509, + "learning_rate": 4.513810491516659e-06, + "loss": 0.1094, + "step": 780 + }, + { + "epoch": 2.2957540263543192, + "grad_norm": 0.8407049179077148, + "learning_rate": 4.342205955460885e-06, + "loss": 0.1022, + "step": 785 + }, + { + "epoch": 2.3103953147877014, + "grad_norm": 0.8497702479362488, + "learning_rate": 4.173373333398154e-06, + "loss": 0.1059, + "step": 790 + }, + { + "epoch": 2.3250366032210836, + "grad_norm": 1.062410831451416, + "learning_rate": 4.007356535869946e-06, + "loss": 0.1014, + "step": 795 + }, + { + "epoch": 2.339677891654466, + "grad_norm": 0.9748228788375854, + "learning_rate": 3.844198741068887e-06, + "loss": 0.101, + "step": 800 + }, + { + "epoch": 2.3543191800878476, + "grad_norm": 0.7262164354324341, + "learning_rate": 3.6839423836087928e-06, + "loss": 0.0883, + "step": 805 + }, + { + "epoch": 2.3689604685212298, + "grad_norm": 0.6985094547271729, + "learning_rate": 3.5266291434881648e-06, + "loss": 0.0931, + "step": 810 + }, + { + "epoch": 2.383601756954612, + "grad_norm": 0.8169254660606384, + "learning_rate": 3.3722999352498868e-06, + "loss": 0.1013, + "step": 815 + }, + { + "epoch": 2.398243045387994, + "grad_norm": 1.660681128501892, + "learning_rate": 3.220994897340067e-06, + "loss": 0.0948, + "step": 820 + }, + { + "epoch": 2.4128843338213763, + "grad_norm": 0.8122191429138184, + "learning_rate": 3.07275338166869e-06, + "loss": 0.0776, + "step": 825 + }, + { + "epoch": 2.4275256222547585, + "grad_norm": 0.9903194904327393, + "learning_rate": 2.9276139433748227e-06, + "loss": 0.0849, + "step": 830 + }, + { + "epoch": 2.4421669106881407, + "grad_norm": 0.9399380087852478, + "learning_rate": 2.785614330799101e-06, + "loss": 0.0856, + "step": 835 + }, + { + "epoch": 2.4568081991215225, + "grad_norm": 0.8150906562805176, + "learning_rate": 2.6467914756659884e-06, + "loss": 0.0939, + "step": 840 + }, + { + "epoch": 2.4714494875549047, + "grad_norm": 0.9147027134895325, + "learning_rate": 2.5111814834784767e-06, + "loss": 0.0875, + "step": 845 + }, + { + "epoch": 2.486090775988287, + "grad_norm": 0.6806795597076416, + "learning_rate": 2.3788196241276246e-06, + "loss": 0.0988, + "step": 850 + }, + { + "epoch": 2.500732064421669, + "grad_norm": 0.8591852188110352, + "learning_rate": 2.24974032271947e-06, + "loss": 0.0782, + "step": 855 + }, + { + "epoch": 2.5153733528550513, + "grad_norm": 0.8064478039741516, + "learning_rate": 2.1239771506216267e-06, + "loss": 0.0955, + "step": 860 + }, + { + "epoch": 2.5300146412884335, + "grad_norm": 0.8001211881637573, + "learning_rate": 2.0015628167319206e-06, + "loss": 0.0868, + "step": 865 + }, + { + "epoch": 2.5446559297218156, + "grad_norm": 0.7855743765830994, + "learning_rate": 1.8825291589713834e-06, + "loss": 0.0808, + "step": 870 + }, + { + "epoch": 2.559297218155198, + "grad_norm": 0.7871037125587463, + "learning_rate": 1.7669071360037043e-06, + "loss": 0.0715, + "step": 875 + }, + { + "epoch": 2.57393850658858, + "grad_norm": 0.7292036414146423, + "learning_rate": 1.6547268191834364e-06, + "loss": 0.0876, + "step": 880 + }, + { + "epoch": 2.588579795021962, + "grad_norm": 0.7139455676078796, + "learning_rate": 1.546017384734903e-06, + "loss": 0.0847, + "step": 885 + }, + { + "epoch": 2.603221083455344, + "grad_norm": 0.7217434644699097, + "learning_rate": 1.4408071061639822e-06, + "loss": 0.0905, + "step": 890 + }, + { + "epoch": 2.617862371888726, + "grad_norm": 0.792564868927002, + "learning_rate": 1.3391233469046276e-06, + "loss": 0.0904, + "step": 895 + }, + { + "epoch": 2.6325036603221084, + "grad_norm": 0.6080449819564819, + "learning_rate": 1.240992553202085e-06, + "loss": 0.0835, + "step": 900 + }, + { + "epoch": 2.6471449487554906, + "grad_norm": 0.6083067655563354, + "learning_rate": 1.1464402472346914e-06, + "loss": 0.0638, + "step": 905 + }, + { + "epoch": 2.6617862371888728, + "grad_norm": 0.6859203577041626, + "learning_rate": 1.055491020475956e-06, + "loss": 0.0889, + "step": 910 + }, + { + "epoch": 2.6764275256222545, + "grad_norm": 0.8005806803703308, + "learning_rate": 9.681685272987544e-07, + "loss": 0.0785, + "step": 915 + }, + { + "epoch": 2.6910688140556367, + "grad_norm": 0.7106640338897705, + "learning_rate": 8.844954788232012e-07, + "loss": 0.0889, + "step": 920 + }, + { + "epoch": 2.705710102489019, + "grad_norm": 0.6522402763366699, + "learning_rate": 8.044936370098849e-07, + "loss": 0.0718, + "step": 925 + }, + { + "epoch": 2.720351390922401, + "grad_norm": 0.9524693489074707, + "learning_rate": 7.281838089999287e-07, + "loss": 0.0823, + "step": 930 + }, + { + "epoch": 2.7349926793557833, + "grad_norm": 0.6434904932975769, + "learning_rate": 6.555858417034183e-07, + "loss": 0.0839, + "step": 935 + }, + { + "epoch": 2.7496339677891655, + "grad_norm": 0.903883159160614, + "learning_rate": 5.867186166375349e-07, + "loss": 0.0856, + "step": 940 + }, + { + "epoch": 2.7642752562225477, + "grad_norm": 0.6362982392311096, + "learning_rate": 5.216000450158059e-07, + "loss": 0.0911, + "step": 945 + }, + { + "epoch": 2.77891654465593, + "grad_norm": 0.7160278558731079, + "learning_rate": 4.602470630896827e-07, + "loss": 0.0778, + "step": 950 + }, + { + "epoch": 2.793557833089312, + "grad_norm": 0.7719944715499878, + "learning_rate": 4.026756277437155e-07, + "loss": 0.1052, + "step": 955 + }, + { + "epoch": 2.8081991215226942, + "grad_norm": 0.7021519541740417, + "learning_rate": 3.4890071234542856e-07, + "loss": 0.0831, + "step": 960 + }, + { + "epoch": 2.822840409956076, + "grad_norm": 0.6436111927032471, + "learning_rate": 2.989363028510017e-07, + "loss": 0.0731, + "step": 965 + }, + { + "epoch": 2.837481698389458, + "grad_norm": 0.5485305786132812, + "learning_rate": 2.5279539416775356e-07, + "loss": 0.0695, + "step": 970 + }, + { + "epoch": 2.8521229868228404, + "grad_norm": 0.7365350127220154, + "learning_rate": 2.1048998677438315e-07, + "loss": 0.0886, + "step": 975 + }, + { + "epoch": 2.8667642752562226, + "grad_norm": 0.6984142065048218, + "learning_rate": 1.7203108359985176e-07, + "loss": 0.0754, + "step": 980 + }, + { + "epoch": 2.8814055636896048, + "grad_norm": 0.796886682510376, + "learning_rate": 1.3742868716170032e-07, + "loss": 0.085, + "step": 985 + }, + { + "epoch": 2.896046852122987, + "grad_norm": 0.9314534068107605, + "learning_rate": 1.06691796964557e-07, + "loss": 0.0771, + "step": 990 + }, + { + "epoch": 2.9106881405563687, + "grad_norm": 0.5491200685501099, + "learning_rate": 7.982840715953288e-08, + "loss": 0.0694, + "step": 995 + }, + { + "epoch": 2.925329428989751, + "grad_norm": 0.6847761273384094, + "learning_rate": 5.68455044650551e-08, + "loss": 0.0921, + "step": 1000 + }, + { + "epoch": 2.939970717423133, + "grad_norm": 0.7366270422935486, + "learning_rate": 3.774906634975872e-08, + "loss": 0.0767, + "step": 1005 + }, + { + "epoch": 2.9546120058565153, + "grad_norm": 0.7460535764694214, + "learning_rate": 2.2544059477832e-08, + "loss": 0.0917, + "step": 1010 + }, + { + "epoch": 2.9692532942898975, + "grad_norm": 0.7156530022621155, + "learning_rate": 1.1234438417279668e-08, + "loss": 0.079, + "step": 1015 + }, + { + "epoch": 2.9838945827232797, + "grad_norm": 0.7923226356506348, + "learning_rate": 3.8231446113989876e-09, + "loss": 0.098, + "step": 1020 + }, + { + "epoch": 2.998535871156662, + "grad_norm": 0.625125527381897, + "learning_rate": 3.1210561376504574e-10, + "loss": 0.0829, + "step": 1025 + }, + { + "epoch": 3.0, + "step": 1026, + "total_flos": 1.2487673941600502e+18, + "train_loss": 0.41914580349191355, + "train_runtime": 553.5753, + "train_samples_per_second": 59.211, + "train_steps_per_second": 1.853 + } + ], + "logging_steps": 5, + "max_steps": 1026, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2487673941600502e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9581f136d36f78a46243cf3ade42630905334cf7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:693b7dd4b27423603e685ee8f3d7ae015bab55a652ce4548606649be922e50b9 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b19f2f5d994d8c6833bb786cc1fc17105452dd5f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 10_128_e3_3e-5 + results: [] +--- + + + +# 10_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..41f4c48fd465b609d512c1868f47c1326fb60fa2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "q_proj", + "k_proj", + "gate_proj", + "o_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcec133e3a72a7946b47973ba2b1c6f1c57e0adf --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37321f21d79f2247866935399bbbe08e5bbc796f6d69048aaeab7e39ef9c218e +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a01dff4e6695b8b7ca78e3877646dfe5807c0956 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3653474861006193e+18, + "train_loss": 0.4246323384219708, + "train_runtime": 590.8989, + "train_samples": 11451, + "train_samples_per_second": 58.137, + "train_steps_per_second": 1.818 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a01dff4e6695b8b7ca78e3877646dfe5807c0956 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3653474861006193e+18, + "train_loss": 0.4246323384219708, + "train_runtime": 590.8989, + "train_samples": 11451, + "train_samples_per_second": 58.137, + "train_steps_per_second": 1.818 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..49e749e0952204978758b927aaa33eec0278a918 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1541 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1074, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013966480446927373, + "grad_norm": 0.7203349471092224, + "learning_rate": 2.222222222222222e-06, + "loss": 1.6016, + "step": 5 + }, + { + "epoch": 0.027932960893854747, + "grad_norm": 0.6865078210830688, + "learning_rate": 4.9999999999999996e-06, + "loss": 1.6309, + "step": 10 + }, + { + "epoch": 0.04189944134078212, + "grad_norm": 0.5279502868652344, + "learning_rate": 7.777777777777777e-06, + "loss": 1.545, + "step": 15 + }, + { + "epoch": 0.055865921787709494, + "grad_norm": 0.4921504557132721, + "learning_rate": 1.0555555555555555e-05, + "loss": 1.5208, + "step": 20 + }, + { + "epoch": 0.06983240223463687, + "grad_norm": 0.465522438287735, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.5841, + "step": 25 + }, + { + "epoch": 0.08379888268156424, + "grad_norm": 0.4933748245239258, + "learning_rate": 1.6111111111111115e-05, + "loss": 1.5453, + "step": 30 + }, + { + "epoch": 0.09776536312849161, + "grad_norm": 0.5411198139190674, + "learning_rate": 1.888888888888889e-05, + "loss": 1.5242, + "step": 35 + }, + { + "epoch": 0.11173184357541899, + "grad_norm": 0.5145306587219238, + "learning_rate": 2.1666666666666667e-05, + "loss": 1.4233, + "step": 40 + }, + { + "epoch": 0.12569832402234637, + "grad_norm": 0.44510337710380554, + "learning_rate": 2.4444444444444445e-05, + "loss": 1.4986, + "step": 45 + }, + { + "epoch": 0.13966480446927373, + "grad_norm": 0.49316900968551636, + "learning_rate": 2.7222222222222223e-05, + "loss": 1.3989, + "step": 50 + }, + { + "epoch": 0.15363128491620112, + "grad_norm": 0.644344687461853, + "learning_rate": 3e-05, + "loss": 1.3852, + "step": 55 + }, + { + "epoch": 0.16759776536312848, + "grad_norm": 0.5065838098526001, + "learning_rate": 2.9998221343471153e-05, + "loss": 1.3479, + "step": 60 + }, + { + "epoch": 0.18156424581005587, + "grad_norm": 0.6286799907684326, + "learning_rate": 2.9992885795700492e-05, + "loss": 1.3001, + "step": 65 + }, + { + "epoch": 0.19553072625698323, + "grad_norm": 0.6353201866149902, + "learning_rate": 2.998399462203559e-05, + "loss": 1.3164, + "step": 70 + }, + { + "epoch": 0.20949720670391062, + "grad_norm": 0.6386446356773376, + "learning_rate": 2.9971549931055665e-05, + "loss": 1.2873, + "step": 75 + }, + { + "epoch": 0.22346368715083798, + "grad_norm": 0.6292840242385864, + "learning_rate": 2.9955554674071492e-05, + "loss": 1.2857, + "step": 80 + }, + { + "epoch": 0.23743016759776536, + "grad_norm": 0.6442350149154663, + "learning_rate": 2.9936012644425518e-05, + "loss": 1.2304, + "step": 85 + }, + { + "epoch": 0.25139664804469275, + "grad_norm": 0.6662073731422424, + "learning_rate": 2.991292847659222e-05, + "loss": 1.1773, + "step": 90 + }, + { + "epoch": 0.26536312849162014, + "grad_norm": 0.7874206900596619, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1839, + "step": 95 + }, + { + "epoch": 0.27932960893854747, + "grad_norm": 0.6279546618461609, + "learning_rate": 2.9856156463128078e-05, + "loss": 1.1501, + "step": 100 + }, + { + "epoch": 0.29329608938547486, + "grad_norm": 0.7541912794113159, + "learning_rate": 2.982248208121889e-05, + "loss": 1.1378, + "step": 105 + }, + { + "epoch": 0.30726256983240224, + "grad_norm": 0.9460755586624146, + "learning_rate": 2.9785292485372714e-05, + "loss": 1.172, + "step": 110 + }, + { + "epoch": 0.32122905027932963, + "grad_norm": 0.7180189490318298, + "learning_rate": 2.974459649525853e-05, + "loss": 1.0674, + "step": 115 + }, + { + "epoch": 0.33519553072625696, + "grad_norm": 0.9715642333030701, + "learning_rate": 2.970040376210148e-05, + "loss": 1.0332, + "step": 120 + }, + { + "epoch": 0.34916201117318435, + "grad_norm": 0.7721325159072876, + "learning_rate": 2.9652724766394012e-05, + "loss": 1.0756, + "step": 125 + }, + { + "epoch": 0.36312849162011174, + "grad_norm": 0.7989616990089417, + "learning_rate": 2.9601570815410394e-05, + "loss": 1.0892, + "step": 130 + }, + { + "epoch": 0.3770949720670391, + "grad_norm": 0.9308871030807495, + "learning_rate": 2.9546954040525144e-05, + "loss": 1.0165, + "step": 135 + }, + { + "epoch": 0.39106145251396646, + "grad_norm": 0.8029282093048096, + "learning_rate": 2.9488887394336025e-05, + "loss": 0.9277, + "step": 140 + }, + { + "epoch": 0.40502793296089384, + "grad_norm": 0.9800552725791931, + "learning_rate": 2.942738464759229e-05, + "loss": 1.0032, + "step": 145 + }, + { + "epoch": 0.41899441340782123, + "grad_norm": 0.878671407699585, + "learning_rate": 2.9362460385928864e-05, + "loss": 0.8848, + "step": 150 + }, + { + "epoch": 0.4329608938547486, + "grad_norm": 0.852571427822113, + "learning_rate": 2.929413000640735e-05, + "loss": 0.9392, + "step": 155 + }, + { + "epoch": 0.44692737430167595, + "grad_norm": 0.9961836338043213, + "learning_rate": 2.922240971386449e-05, + "loss": 0.9392, + "step": 160 + }, + { + "epoch": 0.46089385474860334, + "grad_norm": 0.9165926575660706, + "learning_rate": 2.9147316517069164e-05, + "loss": 0.8621, + "step": 165 + }, + { + "epoch": 0.4748603351955307, + "grad_norm": 1.0740083456039429, + "learning_rate": 2.9068868224688674e-05, + "loss": 0.8476, + "step": 170 + }, + { + "epoch": 0.4888268156424581, + "grad_norm": 0.9501351118087769, + "learning_rate": 2.8987083441065335e-05, + "loss": 0.8535, + "step": 175 + }, + { + "epoch": 0.5027932960893855, + "grad_norm": 0.9711499214172363, + "learning_rate": 2.8901981561804408e-05, + "loss": 0.8177, + "step": 180 + }, + { + "epoch": 0.5167597765363129, + "grad_norm": 1.0689047574996948, + "learning_rate": 2.8813582769174304e-05, + "loss": 0.8476, + "step": 185 + }, + { + "epoch": 0.5307262569832403, + "grad_norm": 0.9738008379936218, + "learning_rate": 2.8721908027320315e-05, + "loss": 0.7869, + "step": 190 + }, + { + "epoch": 0.5446927374301676, + "grad_norm": 1.033265233039856, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.7599, + "step": 195 + }, + { + "epoch": 0.5586592178770949, + "grad_norm": 0.9606071710586548, + "learning_rate": 2.8528818431891495e-05, + "loss": 0.7205, + "step": 200 + }, + { + "epoch": 0.5726256983240223, + "grad_norm": 1.0878839492797852, + "learning_rate": 2.8427449370325938e-05, + "loss": 0.7275, + "step": 205 + }, + { + "epoch": 0.5865921787709497, + "grad_norm": 1.0847512483596802, + "learning_rate": 2.8322895932695272e-05, + "loss": 0.7147, + "step": 210 + }, + { + "epoch": 0.6005586592178771, + "grad_norm": 1.0843349695205688, + "learning_rate": 2.8215182914286768e-05, + "loss": 0.7209, + "step": 215 + }, + { + "epoch": 0.6145251396648045, + "grad_norm": 1.1031627655029297, + "learning_rate": 2.8104335859695545e-05, + "loss": 0.6918, + "step": 220 + }, + { + "epoch": 0.6284916201117319, + "grad_norm": 1.1229658126831055, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.7314, + "step": 225 + }, + { + "epoch": 0.6424581005586593, + "grad_norm": 1.1411434412002563, + "learning_rate": 2.787334553036044e-05, + "loss": 0.6304, + "step": 230 + }, + { + "epoch": 0.6564245810055865, + "grad_norm": 1.2842570543289185, + "learning_rate": 2.7753257035944216e-05, + "loss": 0.6735, + "step": 235 + }, + { + "epoch": 0.6703910614525139, + "grad_norm": 1.0852283239364624, + "learning_rate": 2.763014405300918e-05, + "loss": 0.6465, + "step": 240 + }, + { + "epoch": 0.6843575418994413, + "grad_norm": 1.1048518419265747, + "learning_rate": 2.750403577831679e-05, + "loss": 0.6669, + "step": 245 + }, + { + "epoch": 0.6983240223463687, + "grad_norm": 1.1794778108596802, + "learning_rate": 2.7374962118974533e-05, + "loss": 0.6274, + "step": 250 + }, + { + "epoch": 0.7122905027932961, + "grad_norm": 1.1022047996520996, + "learning_rate": 2.7242953685343327e-05, + "loss": 0.6412, + "step": 255 + }, + { + "epoch": 0.7262569832402235, + "grad_norm": 1.1345787048339844, + "learning_rate": 2.7108041783778144e-05, + "loss": 0.7006, + "step": 260 + }, + { + "epoch": 0.7402234636871509, + "grad_norm": 1.1020519733428955, + "learning_rate": 2.6970258409203596e-05, + "loss": 0.6033, + "step": 265 + }, + { + "epoch": 0.7541899441340782, + "grad_norm": 1.1275442838668823, + "learning_rate": 2.6829636237526175e-05, + "loss": 0.6472, + "step": 270 + }, + { + "epoch": 0.7681564245810056, + "grad_norm": 1.1628135442733765, + "learning_rate": 2.6686208617885057e-05, + "loss": 0.6206, + "step": 275 + }, + { + "epoch": 0.7821229050279329, + "grad_norm": 1.3566250801086426, + "learning_rate": 2.6540009564743186e-05, + "loss": 0.6269, + "step": 280 + }, + { + "epoch": 0.7960893854748603, + "grad_norm": 1.1935744285583496, + "learning_rate": 2.639107374982061e-05, + "loss": 0.5896, + "step": 285 + }, + { + "epoch": 0.8100558659217877, + "grad_norm": 1.1145631074905396, + "learning_rate": 2.623943649387194e-05, + "loss": 0.5567, + "step": 290 + }, + { + "epoch": 0.8240223463687151, + "grad_norm": 1.1931833028793335, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.5497, + "step": 295 + }, + { + "epoch": 0.8379888268156425, + "grad_norm": 1.2803130149841309, + "learning_rate": 2.5928202136676856e-05, + "loss": 0.5865, + "step": 300 + }, + { + "epoch": 0.8519553072625698, + "grad_norm": 1.2806484699249268, + "learning_rate": 2.576867884596663e-05, + "loss": 0.5464, + "step": 305 + }, + { + "epoch": 0.8659217877094972, + "grad_norm": 1.387739658355713, + "learning_rate": 2.5606601717798212e-05, + "loss": 0.5069, + "step": 310 + }, + { + "epoch": 0.8798882681564246, + "grad_norm": 1.2568905353546143, + "learning_rate": 2.5442009189443902e-05, + "loss": 0.4922, + "step": 315 + }, + { + "epoch": 0.8938547486033519, + "grad_norm": 1.127478837966919, + "learning_rate": 2.527494029471371e-05, + "loss": 0.4824, + "step": 320 + }, + { + "epoch": 0.9078212290502793, + "grad_norm": 1.1813393831253052, + "learning_rate": 2.510543465469836e-05, + "loss": 0.5363, + "step": 325 + }, + { + "epoch": 0.9217877094972067, + "grad_norm": 1.3400638103485107, + "learning_rate": 2.4933532468372955e-05, + "loss": 0.4856, + "step": 330 + }, + { + "epoch": 0.9357541899441341, + "grad_norm": 1.227869987487793, + "learning_rate": 2.4759274503063632e-05, + "loss": 0.5087, + "step": 335 + }, + { + "epoch": 0.9497206703910615, + "grad_norm": 1.1825546026229858, + "learning_rate": 2.458270208477942e-05, + "loss": 0.4885, + "step": 340 + }, + { + "epoch": 0.9636871508379888, + "grad_norm": 1.153164029121399, + "learning_rate": 2.44038570884116e-05, + "loss": 0.4924, + "step": 345 + }, + { + "epoch": 0.9776536312849162, + "grad_norm": 1.302517056465149, + "learning_rate": 2.422278192780289e-05, + "loss": 0.5018, + "step": 350 + }, + { + "epoch": 0.9916201117318436, + "grad_norm": 1.2848867177963257, + "learning_rate": 2.4039519545688848e-05, + "loss": 0.4939, + "step": 355 + }, + { + "epoch": 1.005586592178771, + "grad_norm": 1.1800003051757812, + "learning_rate": 2.3854113403513795e-05, + "loss": 0.4171, + "step": 360 + }, + { + "epoch": 1.0195530726256983, + "grad_norm": 1.3128762245178223, + "learning_rate": 2.3666607471123768e-05, + "loss": 0.4747, + "step": 365 + }, + { + "epoch": 1.0335195530726258, + "grad_norm": 1.341719150543213, + "learning_rate": 2.3477046216338877e-05, + "loss": 0.374, + "step": 370 + }, + { + "epoch": 1.047486033519553, + "grad_norm": 1.17770516872406, + "learning_rate": 2.3285474594407588e-05, + "loss": 0.4189, + "step": 375 + }, + { + "epoch": 1.0614525139664805, + "grad_norm": 1.1661663055419922, + "learning_rate": 2.3091938037345373e-05, + "loss": 0.3838, + "step": 380 + }, + { + "epoch": 1.0754189944134078, + "grad_norm": 1.262279748916626, + "learning_rate": 2.2896482443160337e-05, + "loss": 0.3696, + "step": 385 + }, + { + "epoch": 1.089385474860335, + "grad_norm": 1.2113966941833496, + "learning_rate": 2.2699154164968308e-05, + "loss": 0.3835, + "step": 390 + }, + { + "epoch": 1.1033519553072626, + "grad_norm": 1.2418396472930908, + "learning_rate": 2.25e-05, + "loss": 0.4078, + "step": 395 + }, + { + "epoch": 1.1173184357541899, + "grad_norm": 1.1479443311691284, + "learning_rate": 2.2299067178502843e-05, + "loss": 0.3615, + "step": 400 + }, + { + "epoch": 1.1312849162011174, + "grad_norm": 1.185599446296692, + "learning_rate": 2.2096403352540153e-05, + "loss": 0.3904, + "step": 405 + }, + { + "epoch": 1.1452513966480447, + "grad_norm": 1.2507392168045044, + "learning_rate": 2.1892056584690214e-05, + "loss": 0.3689, + "step": 410 + }, + { + "epoch": 1.1592178770949721, + "grad_norm": 1.3928089141845703, + "learning_rate": 2.1686075336648075e-05, + "loss": 0.3411, + "step": 415 + }, + { + "epoch": 1.1731843575418994, + "grad_norm": 1.441921353340149, + "learning_rate": 2.1478508457732617e-05, + "loss": 0.3194, + "step": 420 + }, + { + "epoch": 1.1871508379888267, + "grad_norm": 1.1192326545715332, + "learning_rate": 2.1269405173301752e-05, + "loss": 0.3351, + "step": 425 + }, + { + "epoch": 1.2011173184357542, + "grad_norm": 1.5112532377243042, + "learning_rate": 2.1058815073078425e-05, + "loss": 0.3384, + "step": 430 + }, + { + "epoch": 1.2150837988826815, + "grad_norm": 1.2871603965759277, + "learning_rate": 2.084678809939019e-05, + "loss": 0.2988, + "step": 435 + }, + { + "epoch": 1.229050279329609, + "grad_norm": 1.1816179752349854, + "learning_rate": 2.063337453532519e-05, + "loss": 0.3487, + "step": 440 + }, + { + "epoch": 1.2430167597765363, + "grad_norm": 1.3046737909317017, + "learning_rate": 2.0418624992807297e-05, + "loss": 0.3264, + "step": 445 + }, + { + "epoch": 1.2569832402234637, + "grad_norm": 1.3604315519332886, + "learning_rate": 2.0202590400593287e-05, + "loss": 0.3526, + "step": 450 + }, + { + "epoch": 1.270949720670391, + "grad_norm": 1.2484551668167114, + "learning_rate": 1.9985321992194896e-05, + "loss": 0.303, + "step": 455 + }, + { + "epoch": 1.2849162011173183, + "grad_norm": 1.1718907356262207, + "learning_rate": 1.9766871293728525e-05, + "loss": 0.3068, + "step": 460 + }, + { + "epoch": 1.2988826815642458, + "grad_norm": 1.293911337852478, + "learning_rate": 1.9547290111695654e-05, + "loss": 0.3183, + "step": 465 + }, + { + "epoch": 1.3128491620111733, + "grad_norm": 1.4837592840194702, + "learning_rate": 1.9326630520696683e-05, + "loss": 0.3234, + "step": 470 + }, + { + "epoch": 1.3268156424581006, + "grad_norm": 1.137681484222412, + "learning_rate": 1.9104944851081247e-05, + "loss": 0.2552, + "step": 475 + }, + { + "epoch": 1.3407821229050279, + "grad_norm": 1.3750560283660889, + "learning_rate": 1.888228567653781e-05, + "loss": 0.3128, + "step": 480 + }, + { + "epoch": 1.3547486033519553, + "grad_norm": 1.3349270820617676, + "learning_rate": 1.8658705801625657e-05, + "loss": 0.3106, + "step": 485 + }, + { + "epoch": 1.3687150837988826, + "grad_norm": 1.0409048795700073, + "learning_rate": 1.843425824925201e-05, + "loss": 0.309, + "step": 490 + }, + { + "epoch": 1.3826815642458101, + "grad_norm": 1.148545503616333, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.2602, + "step": 495 + }, + { + "epoch": 1.3966480446927374, + "grad_norm": 1.3204610347747803, + "learning_rate": 1.798297321999255e-05, + "loss": 0.2505, + "step": 500 + }, + { + "epoch": 1.410614525139665, + "grad_norm": 1.1454248428344727, + "learning_rate": 1.775624276724856e-05, + "loss": 0.2519, + "step": 505 + }, + { + "epoch": 1.4245810055865922, + "grad_norm": 1.1186233758926392, + "learning_rate": 1.752885865994549e-05, + "loss": 0.2458, + "step": 510 + }, + { + "epoch": 1.4385474860335195, + "grad_norm": 1.216010332107544, + "learning_rate": 1.7300874823180284e-05, + "loss": 0.2461, + "step": 515 + }, + { + "epoch": 1.452513966480447, + "grad_norm": 1.2609261274337769, + "learning_rate": 1.7072345324278235e-05, + "loss": 0.2636, + "step": 520 + }, + { + "epoch": 1.4664804469273742, + "grad_norm": 1.2027925252914429, + "learning_rate": 1.6843324359970714e-05, + "loss": 0.239, + "step": 525 + }, + { + "epoch": 1.4804469273743017, + "grad_norm": 1.2892686128616333, + "learning_rate": 1.6613866243542173e-05, + "loss": 0.2661, + "step": 530 + }, + { + "epoch": 1.494413407821229, + "grad_norm": 1.0885173082351685, + "learning_rate": 1.638402539194953e-05, + "loss": 0.2101, + "step": 535 + }, + { + "epoch": 1.5083798882681565, + "grad_norm": 1.1277456283569336, + "learning_rate": 1.6153856312916957e-05, + "loss": 0.2173, + "step": 540 + }, + { + "epoch": 1.5223463687150838, + "grad_norm": 1.1598663330078125, + "learning_rate": 1.5923413592009145e-05, + "loss": 0.2366, + "step": 545 + }, + { + "epoch": 1.536312849162011, + "grad_norm": 1.0540968179702759, + "learning_rate": 1.5692751879686097e-05, + "loss": 0.2093, + "step": 550 + }, + { + "epoch": 1.5502793296089385, + "grad_norm": 1.144508957862854, + "learning_rate": 1.5461925878342558e-05, + "loss": 0.2335, + "step": 555 + }, + { + "epoch": 1.564245810055866, + "grad_norm": 1.3423669338226318, + "learning_rate": 1.5230990329335104e-05, + "loss": 0.2402, + "step": 560 + }, + { + "epoch": 1.5782122905027933, + "grad_norm": 1.103735327720642, + "learning_rate": 1.5e-05, + "loss": 0.2117, + "step": 565 + }, + { + "epoch": 1.5921787709497206, + "grad_norm": 1.2496187686920166, + "learning_rate": 1.4769009670664899e-05, + "loss": 0.1846, + "step": 570 + }, + { + "epoch": 1.606145251396648, + "grad_norm": 1.3228400945663452, + "learning_rate": 1.4538074121657448e-05, + "loss": 0.2112, + "step": 575 + }, + { + "epoch": 1.6201117318435754, + "grad_norm": 1.3564728498458862, + "learning_rate": 1.430724812031391e-05, + "loss": 0.2012, + "step": 580 + }, + { + "epoch": 1.6340782122905027, + "grad_norm": 1.2293641567230225, + "learning_rate": 1.4076586407990858e-05, + "loss": 0.2172, + "step": 585 + }, + { + "epoch": 1.6480446927374302, + "grad_norm": 1.3985404968261719, + "learning_rate": 1.3846143687083044e-05, + "loss": 0.2216, + "step": 590 + }, + { + "epoch": 1.6620111731843576, + "grad_norm": 1.0859414339065552, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2543, + "step": 595 + }, + { + "epoch": 1.675977653631285, + "grad_norm": 1.180662989616394, + "learning_rate": 1.3386133756457831e-05, + "loss": 0.2265, + "step": 600 + }, + { + "epoch": 1.6899441340782122, + "grad_norm": 1.3934826850891113, + "learning_rate": 1.3156675640029292e-05, + "loss": 0.2345, + "step": 605 + }, + { + "epoch": 1.7039106145251397, + "grad_norm": 1.2190419435501099, + "learning_rate": 1.2927654675721771e-05, + "loss": 0.2314, + "step": 610 + }, + { + "epoch": 1.7178770949720672, + "grad_norm": 1.1866998672485352, + "learning_rate": 1.2699125176819717e-05, + "loss": 0.198, + "step": 615 + }, + { + "epoch": 1.7318435754189943, + "grad_norm": 1.1522862911224365, + "learning_rate": 1.2471141340054511e-05, + "loss": 0.2337, + "step": 620 + }, + { + "epoch": 1.7458100558659218, + "grad_norm": 1.1577764749526978, + "learning_rate": 1.2243757232751444e-05, + "loss": 0.1865, + "step": 625 + }, + { + "epoch": 1.7597765363128492, + "grad_norm": 0.993998646736145, + "learning_rate": 1.2017026780007454e-05, + "loss": 0.1983, + "step": 630 + }, + { + "epoch": 1.7737430167597765, + "grad_norm": 1.09307062625885, + "learning_rate": 1.1791003751902542e-05, + "loss": 0.1884, + "step": 635 + }, + { + "epoch": 1.7877094972067038, + "grad_norm": 1.2255231142044067, + "learning_rate": 1.1565741750747992e-05, + "loss": 0.2059, + "step": 640 + }, + { + "epoch": 1.8016759776536313, + "grad_norm": 1.1768251657485962, + "learning_rate": 1.1341294198374342e-05, + "loss": 0.1832, + "step": 645 + }, + { + "epoch": 1.8156424581005588, + "grad_norm": 1.1519455909729004, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1896, + "step": 650 + }, + { + "epoch": 1.829608938547486, + "grad_norm": 1.2007498741149902, + "learning_rate": 1.0895055148918758e-05, + "loss": 0.2065, + "step": 655 + }, + { + "epoch": 1.8435754189944134, + "grad_norm": 1.165770411491394, + "learning_rate": 1.0673369479303316e-05, + "loss": 0.1895, + "step": 660 + }, + { + "epoch": 1.8575418994413408, + "grad_norm": 1.1002002954483032, + "learning_rate": 1.0452709888304348e-05, + "loss": 0.1864, + "step": 665 + }, + { + "epoch": 1.8715083798882681, + "grad_norm": 1.0408258438110352, + "learning_rate": 1.0233128706271476e-05, + "loss": 0.1854, + "step": 670 + }, + { + "epoch": 1.8854748603351954, + "grad_norm": 1.355347990989685, + "learning_rate": 1.0014678007805108e-05, + "loss": 0.1716, + "step": 675 + }, + { + "epoch": 1.899441340782123, + "grad_norm": 1.1723463535308838, + "learning_rate": 9.797409599406709e-06, + "loss": 0.1418, + "step": 680 + }, + { + "epoch": 1.9134078212290504, + "grad_norm": 1.0149585008621216, + "learning_rate": 9.581375007192707e-06, + "loss": 0.1759, + "step": 685 + }, + { + "epoch": 1.9273743016759777, + "grad_norm": 1.475258469581604, + "learning_rate": 9.366625464674812e-06, + "loss": 0.1772, + "step": 690 + }, + { + "epoch": 1.941340782122905, + "grad_norm": 1.1101206541061401, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1397, + "step": 695 + }, + { + "epoch": 1.9553072625698324, + "grad_norm": 1.133748173713684, + "learning_rate": 8.941184926921578e-06, + "loss": 0.1628, + "step": 700 + }, + { + "epoch": 1.9692737430167597, + "grad_norm": 1.1319096088409424, + "learning_rate": 8.730594826698255e-06, + "loss": 0.1482, + "step": 705 + }, + { + "epoch": 1.983240223463687, + "grad_norm": 1.0362194776535034, + "learning_rate": 8.521491542267386e-06, + "loss": 0.1462, + "step": 710 + }, + { + "epoch": 1.9972067039106145, + "grad_norm": 1.2727717161178589, + "learning_rate": 8.313924663351927e-06, + "loss": 0.158, + "step": 715 + }, + { + "epoch": 2.011173184357542, + "grad_norm": 0.9754585027694702, + "learning_rate": 8.107943415309787e-06, + "loss": 0.1422, + "step": 720 + }, + { + "epoch": 2.0251396648044695, + "grad_norm": 1.6227654218673706, + "learning_rate": 7.903596647459851e-06, + "loss": 0.1144, + "step": 725 + }, + { + "epoch": 2.0391061452513966, + "grad_norm": 0.9101579785346985, + "learning_rate": 7.700932821497157e-06, + "loss": 0.1212, + "step": 730 + }, + { + "epoch": 2.053072625698324, + "grad_norm": 0.9358206391334534, + "learning_rate": 7.500000000000004e-06, + "loss": 0.1379, + "step": 735 + }, + { + "epoch": 2.0670391061452515, + "grad_norm": 0.8304013609886169, + "learning_rate": 7.300845835031694e-06, + "loss": 0.1276, + "step": 740 + }, + { + "epoch": 2.0810055865921786, + "grad_norm": 0.9435831904411316, + "learning_rate": 7.103517556839661e-06, + "loss": 0.1252, + "step": 745 + }, + { + "epoch": 2.094972067039106, + "grad_norm": 0.9957636594772339, + "learning_rate": 6.9080619626546276e-06, + "loss": 0.1104, + "step": 750 + }, + { + "epoch": 2.1089385474860336, + "grad_norm": 0.8983755707740784, + "learning_rate": 6.7145254055924136e-06, + "loss": 0.1165, + "step": 755 + }, + { + "epoch": 2.122905027932961, + "grad_norm": 1.007921576499939, + "learning_rate": 6.522953783661122e-06, + "loss": 0.1166, + "step": 760 + }, + { + "epoch": 2.136871508379888, + "grad_norm": 1.086976170539856, + "learning_rate": 6.333392528876234e-06, + "loss": 0.1233, + "step": 765 + }, + { + "epoch": 2.1508379888268156, + "grad_norm": 1.005380630493164, + "learning_rate": 6.145886596486208e-06, + "loss": 0.1339, + "step": 770 + }, + { + "epoch": 2.164804469273743, + "grad_norm": 0.770706057548523, + "learning_rate": 5.960480454311155e-06, + "loss": 0.1042, + "step": 775 + }, + { + "epoch": 2.17877094972067, + "grad_norm": 1.1018253564834595, + "learning_rate": 5.777218072197114e-06, + "loss": 0.1051, + "step": 780 + }, + { + "epoch": 2.1927374301675977, + "grad_norm": 0.8470197319984436, + "learning_rate": 5.596142911588407e-06, + "loss": 0.0943, + "step": 785 + }, + { + "epoch": 2.206703910614525, + "grad_norm": 0.8855159878730774, + "learning_rate": 5.417297915220584e-06, + "loss": 0.1147, + "step": 790 + }, + { + "epoch": 2.2206703910614527, + "grad_norm": 0.9235092997550964, + "learning_rate": 5.240725496936373e-06, + "loss": 0.1077, + "step": 795 + }, + { + "epoch": 2.2346368715083798, + "grad_norm": 1.0513064861297607, + "learning_rate": 5.0664675316270515e-06, + "loss": 0.1172, + "step": 800 + }, + { + "epoch": 2.2486033519553073, + "grad_norm": 0.8240793943405151, + "learning_rate": 4.894565345301642e-06, + "loss": 0.0927, + "step": 805 + }, + { + "epoch": 2.2625698324022347, + "grad_norm": 0.9047719836235046, + "learning_rate": 4.725059705286291e-06, + "loss": 0.0954, + "step": 810 + }, + { + "epoch": 2.276536312849162, + "grad_norm": 0.8584367036819458, + "learning_rate": 4.557990810556102e-06, + "loss": 0.1009, + "step": 815 + }, + { + "epoch": 2.2905027932960893, + "grad_norm": 0.9099041223526001, + "learning_rate": 4.393398282201788e-06, + "loss": 0.1127, + "step": 820 + }, + { + "epoch": 2.304469273743017, + "grad_norm": 0.8258346915245056, + "learning_rate": 4.231321154033372e-06, + "loss": 0.0919, + "step": 825 + }, + { + "epoch": 2.3184357541899443, + "grad_norm": 1.0764776468276978, + "learning_rate": 4.071797863323148e-06, + "loss": 0.1224, + "step": 830 + }, + { + "epoch": 2.3324022346368714, + "grad_norm": 0.883073627948761, + "learning_rate": 3.914866241690115e-06, + "loss": 0.1074, + "step": 835 + }, + { + "epoch": 2.346368715083799, + "grad_norm": 0.871377170085907, + "learning_rate": 3.7605635061280604e-06, + "loss": 0.0889, + "step": 840 + }, + { + "epoch": 2.3603351955307263, + "grad_norm": 1.0565881729125977, + "learning_rate": 3.608926250179392e-06, + "loss": 0.1056, + "step": 845 + }, + { + "epoch": 2.3743016759776534, + "grad_norm": 0.9011804461479187, + "learning_rate": 3.459990435256816e-06, + "loss": 0.1028, + "step": 850 + }, + { + "epoch": 2.388268156424581, + "grad_norm": 0.8661842346191406, + "learning_rate": 3.313791382114943e-06, + "loss": 0.0988, + "step": 855 + }, + { + "epoch": 2.4022346368715084, + "grad_norm": 0.7948212027549744, + "learning_rate": 3.1703637624738254e-06, + "loss": 0.0961, + "step": 860 + }, + { + "epoch": 2.416201117318436, + "grad_norm": 0.8984811305999756, + "learning_rate": 3.0297415907964078e-06, + "loss": 0.1153, + "step": 865 + }, + { + "epoch": 2.430167597765363, + "grad_norm": 0.9244222640991211, + "learning_rate": 2.891958216221857e-06, + "loss": 0.089, + "step": 870 + }, + { + "epoch": 2.4441340782122905, + "grad_norm": 0.8003082871437073, + "learning_rate": 2.757046314656676e-06, + "loss": 0.1086, + "step": 875 + }, + { + "epoch": 2.458100558659218, + "grad_norm": 0.8119866847991943, + "learning_rate": 2.6250378810254673e-06, + "loss": 0.0915, + "step": 880 + }, + { + "epoch": 2.472067039106145, + "grad_norm": 0.9847259521484375, + "learning_rate": 2.495964221683209e-06, + "loss": 0.1086, + "step": 885 + }, + { + "epoch": 2.4860335195530725, + "grad_norm": 0.9984832406044006, + "learning_rate": 2.3698559469908228e-06, + "loss": 0.0957, + "step": 890 + }, + { + "epoch": 2.5, + "grad_norm": 0.6837669014930725, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.1001, + "step": 895 + }, + { + "epoch": 2.5139664804469275, + "grad_norm": 1.742416262626648, + "learning_rate": 2.1266544696395584e-06, + "loss": 0.1082, + "step": 900 + }, + { + "epoch": 2.527932960893855, + "grad_norm": 0.966926634311676, + "learning_rate": 2.0096189432334194e-06, + "loss": 0.0919, + "step": 905 + }, + { + "epoch": 2.541899441340782, + "grad_norm": 0.9170368909835815, + "learning_rate": 1.8956641403044578e-06, + "loss": 0.0877, + "step": 910 + }, + { + "epoch": 2.5558659217877095, + "grad_norm": 0.8274086713790894, + "learning_rate": 1.784817085713233e-06, + "loss": 0.1027, + "step": 915 + }, + { + "epoch": 2.5698324022346366, + "grad_norm": 0.6662862300872803, + "learning_rate": 1.6771040673047271e-06, + "loss": 0.0939, + "step": 920 + }, + { + "epoch": 2.583798882681564, + "grad_norm": 1.1077537536621094, + "learning_rate": 1.5725506296740666e-06, + "loss": 0.1119, + "step": 925 + }, + { + "epoch": 2.5977653631284916, + "grad_norm": 0.845086932182312, + "learning_rate": 1.4711815681085094e-06, + "loss": 0.0966, + "step": 930 + }, + { + "epoch": 2.611731843575419, + "grad_norm": 0.8606322407722473, + "learning_rate": 1.3730209227071439e-06, + "loss": 0.1127, + "step": 935 + }, + { + "epoch": 2.6256983240223466, + "grad_norm": 0.8367630243301392, + "learning_rate": 1.2780919726796846e-06, + "loss": 0.0996, + "step": 940 + }, + { + "epoch": 2.6396648044692737, + "grad_norm": 0.6791664958000183, + "learning_rate": 1.186417230825695e-06, + "loss": 0.0976, + "step": 945 + }, + { + "epoch": 2.653631284916201, + "grad_norm": 0.7007296085357666, + "learning_rate": 1.0980184381955944e-06, + "loss": 0.0845, + "step": 950 + }, + { + "epoch": 2.6675977653631286, + "grad_norm": 0.7359950542449951, + "learning_rate": 1.0129165589346644e-06, + "loss": 0.0851, + "step": 955 + }, + { + "epoch": 2.6815642458100557, + "grad_norm": 0.7112019658088684, + "learning_rate": 9.311317753113319e-07, + "loss": 0.0949, + "step": 960 + }, + { + "epoch": 2.695530726256983, + "grad_norm": 0.8289726376533508, + "learning_rate": 8.526834829308383e-07, + "loss": 0.087, + "step": 965 + }, + { + "epoch": 2.7094972067039107, + "grad_norm": 0.7729262113571167, + "learning_rate": 7.775902861355122e-07, + "loss": 0.0827, + "step": 970 + }, + { + "epoch": 2.723463687150838, + "grad_norm": 0.7763258218765259, + "learning_rate": 7.058699935926527e-07, + "loss": 0.0906, + "step": 975 + }, + { + "epoch": 2.7374301675977653, + "grad_norm": 0.8387095332145691, + "learning_rate": 6.375396140711348e-07, + "loss": 0.0892, + "step": 980 + }, + { + "epoch": 2.7513966480446927, + "grad_norm": 0.7452312707901001, + "learning_rate": 5.726153524077144e-07, + "loss": 0.0809, + "step": 985 + }, + { + "epoch": 2.7653631284916202, + "grad_norm": 0.7580963373184204, + "learning_rate": 5.11112605663977e-07, + "loss": 0.1003, + "step": 990 + }, + { + "epoch": 2.7793296089385473, + "grad_norm": 0.766392707824707, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.0998, + "step": 995 + }, + { + "epoch": 2.793296089385475, + "grad_norm": 0.5791566967964172, + "learning_rate": 3.984291845896071e-07, + "loss": 0.0711, + "step": 1000 + }, + { + "epoch": 2.8072625698324023, + "grad_norm": 0.7512385845184326, + "learning_rate": 3.472752336059898e-07, + "loss": 0.0877, + "step": 1005 + }, + { + "epoch": 2.82122905027933, + "grad_norm": 0.599638819694519, + "learning_rate": 2.995962378985223e-07, + "loss": 0.0758, + "step": 1010 + }, + { + "epoch": 2.835195530726257, + "grad_norm": 0.8511674404144287, + "learning_rate": 2.5540350474147324e-07, + "loss": 0.1026, + "step": 1015 + }, + { + "epoch": 2.8491620111731844, + "grad_norm": 0.9347558617591858, + "learning_rate": 2.1470751462729143e-07, + "loss": 0.093, + "step": 1020 + }, + { + "epoch": 2.863128491620112, + "grad_norm": 0.7446538805961609, + "learning_rate": 1.7751791878110933e-07, + "loss": 0.0861, + "step": 1025 + }, + { + "epoch": 2.877094972067039, + "grad_norm": 0.9024459719657898, + "learning_rate": 1.4384353687192376e-07, + "loss": 0.1002, + "step": 1030 + }, + { + "epoch": 2.8910614525139664, + "grad_norm": 0.7803855538368225, + "learning_rate": 1.1369235492096397e-07, + "loss": 0.1013, + "step": 1035 + }, + { + "epoch": 2.905027932960894, + "grad_norm": 0.6941578388214111, + "learning_rate": 8.707152340778346e-08, + "loss": 0.0849, + "step": 1040 + }, + { + "epoch": 2.9189944134078214, + "grad_norm": 0.7539619207382202, + "learning_rate": 6.398735557448299e-08, + "loss": 0.097, + "step": 1045 + }, + { + "epoch": 2.9329608938547485, + "grad_norm": 0.7197653651237488, + "learning_rate": 4.4445325928506584e-08, + "loss": 0.0795, + "step": 1050 + }, + { + "epoch": 2.946927374301676, + "grad_norm": 0.777992844581604, + "learning_rate": 2.8450068944338436e-08, + "loss": 0.0837, + "step": 1055 + }, + { + "epoch": 2.9608938547486034, + "grad_norm": 0.7011080384254456, + "learning_rate": 1.6005377964413702e-08, + "loss": 0.0856, + "step": 1060 + }, + { + "epoch": 2.9748603351955305, + "grad_norm": 0.7812681794166565, + "learning_rate": 7.114204299511484e-09, + "loss": 0.0854, + "step": 1065 + }, + { + "epoch": 2.988826815642458, + "grad_norm": 0.6603164672851562, + "learning_rate": 1.7786565288463452e-09, + "loss": 0.0757, + "step": 1070 + }, + { + "epoch": 3.0, + "step": 1074, + "total_flos": 1.3653474861006193e+18, + "train_loss": 0.4246323384219708, + "train_runtime": 590.8989, + "train_samples_per_second": 58.137, + "train_steps_per_second": 1.818 + } + ], + "logging_steps": 5, + "max_steps": 1074, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3653474861006193e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e05e7da4be27b16386fd1275ecc1c1dab8a5ff7a --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/10_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e6b837544c318e4d7dbcb167398bf2e13eddf790bbf3472946bc3415bb11a6 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..350201bd7c9173cdb28dd371009e4d37ede90a5d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 11_128_e3_3e-5 + results: [] +--- + + + +# 11_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e517190275abae8fb4f861ebaf4d05f29b72340b --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07fa3431952afe27a76e1ef19c6bd4c921fcc969 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92158a09b0db0cb93ac1cb3ca0d95f5e240d13d5600e3da57531ac95c4febeb9 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6bfdfddd0473e19d97b79a94193438fb06e6e562 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.1998547259110195e+18, + "train_loss": 0.438542250181814, + "train_runtime": 545.6254, + "train_samples": 10189, + "train_samples_per_second": 56.022, + "train_steps_per_second": 1.754 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6bfdfddd0473e19d97b79a94193438fb06e6e562 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.1998547259110195e+18, + "train_loss": 0.438542250181814, + "train_runtime": 545.6254, + "train_samples": 10189, + "train_samples_per_second": 56.022, + "train_steps_per_second": 1.754 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8695260f6ce8f77d1b24f586b97d4c750e1d5308 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1380 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 957, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.015698587127158554, + "grad_norm": 0.6900603771209717, + "learning_rate": 2.4999999999999998e-06, + "loss": 1.6753, + "step": 5 + }, + { + "epoch": 0.03139717425431711, + "grad_norm": 0.6679512858390808, + "learning_rate": 5.625e-06, + "loss": 1.6295, + "step": 10 + }, + { + "epoch": 0.04709576138147567, + "grad_norm": 0.6062449812889099, + "learning_rate": 8.750000000000001e-06, + "loss": 1.6477, + "step": 15 + }, + { + "epoch": 0.06279434850863422, + "grad_norm": 0.45307406783103943, + "learning_rate": 1.1874999999999999e-05, + "loss": 1.5532, + "step": 20 + }, + { + "epoch": 0.07849293563579278, + "grad_norm": 0.9441843032836914, + "learning_rate": 1.5e-05, + "loss": 1.6615, + "step": 25 + }, + { + "epoch": 0.09419152276295134, + "grad_norm": 0.4951711595058441, + "learning_rate": 1.8125e-05, + "loss": 1.542, + "step": 30 + }, + { + "epoch": 0.10989010989010989, + "grad_norm": 0.524151623249054, + "learning_rate": 2.125e-05, + "loss": 1.5148, + "step": 35 + }, + { + "epoch": 0.12558869701726844, + "grad_norm": 0.4440474808216095, + "learning_rate": 2.4375e-05, + "loss": 1.5395, + "step": 40 + }, + { + "epoch": 0.141287284144427, + "grad_norm": 0.45904266834259033, + "learning_rate": 2.75e-05, + "loss": 1.4908, + "step": 45 + }, + { + "epoch": 0.15698587127158556, + "grad_norm": 0.5122482776641846, + "learning_rate": 2.9999910415513213e-05, + "loss": 1.4155, + "step": 50 + }, + { + "epoch": 0.1726844583987441, + "grad_norm": 0.5449596643447876, + "learning_rate": 2.9996775070829517e-05, + "loss": 1.3941, + "step": 55 + }, + { + "epoch": 0.18838304552590268, + "grad_norm": 0.5328635573387146, + "learning_rate": 2.9989161571799668e-05, + "loss": 1.4128, + "step": 60 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.625248908996582, + "learning_rate": 2.997707219187402e-05, + "loss": 1.3774, + "step": 65 + }, + { + "epoch": 0.21978021978021978, + "grad_norm": 0.5328071117401123, + "learning_rate": 2.9960510541036e-05, + "loss": 1.3903, + "step": 70 + }, + { + "epoch": 0.23547880690737832, + "grad_norm": 0.7029002904891968, + "learning_rate": 2.993948156472409e-05, + "loss": 1.3289, + "step": 75 + }, + { + "epoch": 0.25117739403453687, + "grad_norm": 0.5624765753746033, + "learning_rate": 2.9913991542355115e-05, + "loss": 1.2266, + "step": 80 + }, + { + "epoch": 0.2668759811616955, + "grad_norm": 0.6174695491790771, + "learning_rate": 2.988404808544915e-05, + "loss": 1.2063, + "step": 85 + }, + { + "epoch": 0.282574568288854, + "grad_norm": 0.6470711827278137, + "learning_rate": 2.9849660135356648e-05, + "loss": 1.1876, + "step": 90 + }, + { + "epoch": 0.29827315541601257, + "grad_norm": 0.7742108702659607, + "learning_rate": 2.9810837960588506e-05, + "loss": 1.172, + "step": 95 + }, + { + "epoch": 0.3139717425431711, + "grad_norm": 0.7606752514839172, + "learning_rate": 2.976759315374979e-05, + "loss": 1.1812, + "step": 100 + }, + { + "epoch": 0.32967032967032966, + "grad_norm": 0.7596885561943054, + "learning_rate": 2.97199386280781e-05, + "loss": 1.1364, + "step": 105 + }, + { + "epoch": 0.3453689167974882, + "grad_norm": 0.7569558024406433, + "learning_rate": 2.9667888613587565e-05, + "loss": 1.0555, + "step": 110 + }, + { + "epoch": 0.36106750392464676, + "grad_norm": 0.7437660098075867, + "learning_rate": 2.961145865281967e-05, + "loss": 1.024, + "step": 115 + }, + { + "epoch": 0.37676609105180536, + "grad_norm": 0.9708836078643799, + "learning_rate": 2.9550665596202116e-05, + "loss": 1.0956, + "step": 120 + }, + { + "epoch": 0.3924646781789639, + "grad_norm": 0.9559341073036194, + "learning_rate": 2.948552759701715e-05, + "loss": 1.013, + "step": 125 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 0.8150585293769836, + "learning_rate": 2.941606410598087e-05, + "loss": 0.9957, + "step": 130 + }, + { + "epoch": 0.423861852433281, + "grad_norm": 0.8210108280181885, + "learning_rate": 2.9342295865435055e-05, + "loss": 1.0161, + "step": 135 + }, + { + "epoch": 0.43956043956043955, + "grad_norm": 0.9916063547134399, + "learning_rate": 2.926424490315338e-05, + "loss": 0.9916, + "step": 140 + }, + { + "epoch": 0.4552590266875981, + "grad_norm": 0.9624769687652588, + "learning_rate": 2.9181934525763717e-05, + "loss": 0.9128, + "step": 145 + }, + { + "epoch": 0.47095761381475665, + "grad_norm": 1.0506582260131836, + "learning_rate": 2.9095389311788626e-05, + "loss": 0.8164, + "step": 150 + }, + { + "epoch": 0.48665620094191525, + "grad_norm": 1.0165684223175049, + "learning_rate": 2.900463510430598e-05, + "loss": 0.8502, + "step": 155 + }, + { + "epoch": 0.5023547880690737, + "grad_norm": 0.9751114845275879, + "learning_rate": 2.8909699003232043e-05, + "loss": 0.8271, + "step": 160 + }, + { + "epoch": 0.5180533751962323, + "grad_norm": 1.0376160144805908, + "learning_rate": 2.8810609357229226e-05, + "loss": 0.8048, + "step": 165 + }, + { + "epoch": 0.533751962323391, + "grad_norm": 1.0038161277770996, + "learning_rate": 2.870739575524093e-05, + "loss": 0.8468, + "step": 170 + }, + { + "epoch": 0.5494505494505495, + "grad_norm": 0.9729577302932739, + "learning_rate": 2.8600089017656087e-05, + "loss": 0.8363, + "step": 175 + }, + { + "epoch": 0.565149136577708, + "grad_norm": 1.2085906267166138, + "learning_rate": 2.8488721187105934e-05, + "loss": 0.7886, + "step": 180 + }, + { + "epoch": 0.5808477237048666, + "grad_norm": 1.006058692932129, + "learning_rate": 2.8373325518895826e-05, + "loss": 0.7741, + "step": 185 + }, + { + "epoch": 0.5965463108320251, + "grad_norm": 1.1905957460403442, + "learning_rate": 2.8253936471074954e-05, + "loss": 0.7403, + "step": 190 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 1.3676621913909912, + "learning_rate": 2.81305896941469e-05, + "loss": 0.7126, + "step": 195 + }, + { + "epoch": 0.6279434850863422, + "grad_norm": 1.0676138401031494, + "learning_rate": 2.8003322020424126e-05, + "loss": 0.7182, + "step": 200 + }, + { + "epoch": 0.6436420722135008, + "grad_norm": 1.0947225093841553, + "learning_rate": 2.787217145302953e-05, + "loss": 0.6954, + "step": 205 + }, + { + "epoch": 0.6593406593406593, + "grad_norm": 1.1030857563018799, + "learning_rate": 2.7737177154548442e-05, + "loss": 0.7281, + "step": 210 + }, + { + "epoch": 0.6750392464678179, + "grad_norm": 1.1164060831069946, + "learning_rate": 2.7598379435334358e-05, + "loss": 0.7196, + "step": 215 + }, + { + "epoch": 0.6907378335949764, + "grad_norm": 1.176908254623413, + "learning_rate": 2.7455819741471965e-05, + "loss": 0.6513, + "step": 220 + }, + { + "epoch": 0.706436420722135, + "grad_norm": 1.3466682434082031, + "learning_rate": 2.7309540642400998e-05, + "loss": 0.6752, + "step": 225 + }, + { + "epoch": 0.7221350078492935, + "grad_norm": 1.2542909383773804, + "learning_rate": 2.715958581820469e-05, + "loss": 0.6429, + "step": 230 + }, + { + "epoch": 0.7378335949764521, + "grad_norm": 1.2519490718841553, + "learning_rate": 2.7006000046566543e-05, + "loss": 0.67, + "step": 235 + }, + { + "epoch": 0.7535321821036107, + "grad_norm": 1.0745443105697632, + "learning_rate": 2.684882918939937e-05, + "loss": 0.6318, + "step": 240 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.2951083183288574, + "learning_rate": 2.6688120179150563e-05, + "loss": 0.6317, + "step": 245 + }, + { + "epoch": 0.7849293563579278, + "grad_norm": 1.2243515253067017, + "learning_rate": 2.6523921004787707e-05, + "loss": 0.5855, + "step": 250 + }, + { + "epoch": 0.8006279434850864, + "grad_norm": 1.1834031343460083, + "learning_rate": 2.635628069746869e-05, + "loss": 0.6239, + "step": 255 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 1.1265568733215332, + "learning_rate": 2.6185249315900625e-05, + "loss": 0.5832, + "step": 260 + }, + { + "epoch": 0.8320251177394035, + "grad_norm": 1.0771814584732056, + "learning_rate": 2.6010877931391905e-05, + "loss": 0.6005, + "step": 265 + }, + { + "epoch": 0.847723704866562, + "grad_norm": 1.6078089475631714, + "learning_rate": 2.5833218612601937e-05, + "loss": 0.5786, + "step": 270 + }, + { + "epoch": 0.8634222919937206, + "grad_norm": 1.4516563415527344, + "learning_rate": 2.5652324409993034e-05, + "loss": 0.5238, + "step": 275 + }, + { + "epoch": 0.8791208791208791, + "grad_norm": 1.2315969467163086, + "learning_rate": 2.546824933998911e-05, + "loss": 0.4698, + "step": 280 + }, + { + "epoch": 0.8948194662480377, + "grad_norm": 1.3999252319335938, + "learning_rate": 2.5281048368845964e-05, + "loss": 0.586, + "step": 285 + }, + { + "epoch": 0.9105180533751962, + "grad_norm": 1.2517080307006836, + "learning_rate": 2.509077739623793e-05, + "loss": 0.4934, + "step": 290 + }, + { + "epoch": 0.9262166405023547, + "grad_norm": 1.1185775995254517, + "learning_rate": 2.48974932385658e-05, + "loss": 0.4992, + "step": 295 + }, + { + "epoch": 0.9419152276295133, + "grad_norm": 1.1316981315612793, + "learning_rate": 2.470125361199099e-05, + "loss": 0.503, + "step": 300 + }, + { + "epoch": 0.957613814756672, + "grad_norm": 1.551705241203308, + "learning_rate": 2.4502117115201048e-05, + "loss": 0.4761, + "step": 305 + }, + { + "epoch": 0.9733124018838305, + "grad_norm": 1.2394992113113403, + "learning_rate": 2.430014321191163e-05, + "loss": 0.4878, + "step": 310 + }, + { + "epoch": 0.989010989010989, + "grad_norm": 1.171175479888916, + "learning_rate": 2.4095392213110167e-05, + "loss": 0.5518, + "step": 315 + }, + { + "epoch": 1.0031397174254317, + "grad_norm": 1.4029021263122559, + "learning_rate": 2.3887925259046542e-05, + "loss": 0.5298, + "step": 320 + }, + { + "epoch": 1.0188383045525902, + "grad_norm": 1.3655701875686646, + "learning_rate": 2.3677804300976112e-05, + "loss": 0.4236, + "step": 325 + }, + { + "epoch": 1.0345368916797488, + "grad_norm": 1.2356079816818237, + "learning_rate": 2.3465092082660595e-05, + "loss": 0.4326, + "step": 330 + }, + { + "epoch": 1.0502354788069075, + "grad_norm": 1.4036592245101929, + "learning_rate": 2.3249852121632293e-05, + "loss": 0.3647, + "step": 335 + }, + { + "epoch": 1.065934065934066, + "grad_norm": 1.3196923732757568, + "learning_rate": 2.303214869022725e-05, + "loss": 0.429, + "step": 340 + }, + { + "epoch": 1.0816326530612246, + "grad_norm": 1.3066924810409546, + "learning_rate": 2.2812046796393032e-05, + "loss": 0.3936, + "step": 345 + }, + { + "epoch": 1.097331240188383, + "grad_norm": 1.1219220161437988, + "learning_rate": 2.258961216427686e-05, + "loss": 0.3333, + "step": 350 + }, + { + "epoch": 1.1130298273155417, + "grad_norm": 1.2964699268341064, + "learning_rate": 2.2364911214599832e-05, + "loss": 0.377, + "step": 355 + }, + { + "epoch": 1.1287284144427001, + "grad_norm": 1.4458624124526978, + "learning_rate": 2.2138011044823226e-05, + "loss": 0.3985, + "step": 360 + }, + { + "epoch": 1.1444270015698588, + "grad_norm": 1.2792613506317139, + "learning_rate": 2.190897940911262e-05, + "loss": 0.3485, + "step": 365 + }, + { + "epoch": 1.1601255886970172, + "grad_norm": 1.158077597618103, + "learning_rate": 2.1677884698106006e-05, + "loss": 0.3144, + "step": 370 + }, + { + "epoch": 1.1758241758241759, + "grad_norm": 1.2873823642730713, + "learning_rate": 2.1444795918491796e-05, + "loss": 0.363, + "step": 375 + }, + { + "epoch": 1.1915227629513343, + "grad_norm": 1.42474365234375, + "learning_rate": 2.1209782672402877e-05, + "loss": 0.3335, + "step": 380 + }, + { + "epoch": 1.207221350078493, + "grad_norm": 1.241494059562683, + "learning_rate": 2.0972915136632894e-05, + "loss": 0.35, + "step": 385 + }, + { + "epoch": 1.2229199372056514, + "grad_norm": 1.4142056703567505, + "learning_rate": 2.0734264041680886e-05, + "loss": 0.3025, + "step": 390 + }, + { + "epoch": 1.23861852433281, + "grad_norm": 1.2929911613464355, + "learning_rate": 2.049390065063062e-05, + "loss": 0.3347, + "step": 395 + }, + { + "epoch": 1.2543171114599687, + "grad_norm": 1.2655370235443115, + "learning_rate": 2.0251896737870862e-05, + "loss": 0.3494, + "step": 400 + }, + { + "epoch": 1.2700156985871272, + "grad_norm": 1.297495722770691, + "learning_rate": 2.000832456766301e-05, + "loss": 0.3029, + "step": 405 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.599678635597229, + "learning_rate": 1.976325687256239e-05, + "loss": 0.3404, + "step": 410 + }, + { + "epoch": 1.3014128728414442, + "grad_norm": 1.2297910451889038, + "learning_rate": 1.9516766831699767e-05, + "loss": 0.3006, + "step": 415 + }, + { + "epoch": 1.317111459968603, + "grad_norm": 1.1875665187835693, + "learning_rate": 1.9268928048929476e-05, + "loss": 0.2911, + "step": 420 + }, + { + "epoch": 1.3328100470957613, + "grad_norm": 1.2223048210144043, + "learning_rate": 1.9019814530850722e-05, + "loss": 0.323, + "step": 425 + }, + { + "epoch": 1.34850863422292, + "grad_norm": 1.1949001550674438, + "learning_rate": 1.8769500664708665e-05, + "loss": 0.333, + "step": 430 + }, + { + "epoch": 1.3642072213500784, + "grad_norm": 1.4552446603775024, + "learning_rate": 1.851806119618178e-05, + "loss": 0.297, + "step": 435 + }, + { + "epoch": 1.379905808477237, + "grad_norm": 1.2756812572479248, + "learning_rate": 1.826557120706221e-05, + "loss": 0.2891, + "step": 440 + }, + { + "epoch": 1.3956043956043955, + "grad_norm": 1.6384458541870117, + "learning_rate": 1.801210609283578e-05, + "loss": 0.3103, + "step": 445 + }, + { + "epoch": 1.4113029827315542, + "grad_norm": 1.297240972518921, + "learning_rate": 1.7757741540168276e-05, + "loss": 0.2997, + "step": 450 + }, + { + "epoch": 1.4270015698587128, + "grad_norm": 1.3759106397628784, + "learning_rate": 1.750255350430487e-05, + "loss": 0.2653, + "step": 455 + }, + { + "epoch": 1.4427001569858713, + "grad_norm": 1.2000504732131958, + "learning_rate": 1.7246618186389225e-05, + "loss": 0.26, + "step": 460 + }, + { + "epoch": 1.4583987441130297, + "grad_norm": 1.2562834024429321, + "learning_rate": 1.6990012010709293e-05, + "loss": 0.2663, + "step": 465 + }, + { + "epoch": 1.4740973312401884, + "grad_norm": 1.3584175109863281, + "learning_rate": 1.6732811601876395e-05, + "loss": 0.2628, + "step": 470 + }, + { + "epoch": 1.489795918367347, + "grad_norm": 1.3365910053253174, + "learning_rate": 1.6475093761944522e-05, + "loss": 0.2458, + "step": 475 + }, + { + "epoch": 1.5054945054945055, + "grad_norm": 1.2599745988845825, + "learning_rate": 1.6216935447476633e-05, + "loss": 0.2705, + "step": 480 + }, + { + "epoch": 1.521193092621664, + "grad_norm": 1.3098772764205933, + "learning_rate": 1.5958413746564833e-05, + "loss": 0.2708, + "step": 485 + }, + { + "epoch": 1.5368916797488226, + "grad_norm": 1.544609546661377, + "learning_rate": 1.5699605855811263e-05, + "loss": 0.2457, + "step": 490 + }, + { + "epoch": 1.5525902668759812, + "grad_norm": 1.3203516006469727, + "learning_rate": 1.5440589057276587e-05, + "loss": 0.2555, + "step": 495 + }, + { + "epoch": 1.5682888540031397, + "grad_norm": 1.3334897756576538, + "learning_rate": 1.5181440695402963e-05, + "loss": 0.2442, + "step": 500 + }, + { + "epoch": 1.5839874411302983, + "grad_norm": 1.2534574270248413, + "learning_rate": 1.4922238153918409e-05, + "loss": 0.2322, + "step": 505 + }, + { + "epoch": 1.599686028257457, + "grad_norm": 1.3100496530532837, + "learning_rate": 1.4663058832729368e-05, + "loss": 0.2239, + "step": 510 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 1.3238357305526733, + "learning_rate": 1.4403980124808546e-05, + "loss": 0.225, + "step": 515 + }, + { + "epoch": 1.6310832025117739, + "grad_norm": 1.3329050540924072, + "learning_rate": 1.414507939308473e-05, + "loss": 0.2303, + "step": 520 + }, + { + "epoch": 1.6467817896389325, + "grad_norm": 1.2496236562728882, + "learning_rate": 1.3886433947341626e-05, + "loss": 0.2289, + "step": 525 + }, + { + "epoch": 1.6624803767660912, + "grad_norm": 1.511303424835205, + "learning_rate": 1.3628121021132552e-05, + "loss": 0.2509, + "step": 530 + }, + { + "epoch": 1.6781789638932496, + "grad_norm": 1.2861193418502808, + "learning_rate": 1.3370217748717882e-05, + "loss": 0.2022, + "step": 535 + }, + { + "epoch": 1.693877551020408, + "grad_norm": 1.2037034034729004, + "learning_rate": 1.3112801142032169e-05, + "loss": 0.1862, + "step": 540 + }, + { + "epoch": 1.7095761381475667, + "grad_norm": 1.2905346155166626, + "learning_rate": 1.2855948067687776e-05, + "loss": 0.1893, + "step": 545 + }, + { + "epoch": 1.7252747252747254, + "grad_norm": 1.1683948040008545, + "learning_rate": 1.2599735224021893e-05, + "loss": 0.2021, + "step": 550 + }, + { + "epoch": 1.7409733124018838, + "grad_norm": 1.1141574382781982, + "learning_rate": 1.2344239118193825e-05, + "loss": 0.1702, + "step": 555 + }, + { + "epoch": 1.7566718995290422, + "grad_norm": 1.1173423528671265, + "learning_rate": 1.2089536043339352e-05, + "loss": 0.2007, + "step": 560 + }, + { + "epoch": 1.772370486656201, + "grad_norm": 1.132977843284607, + "learning_rate": 1.1835702055789005e-05, + "loss": 0.1697, + "step": 565 + }, + { + "epoch": 1.7880690737833596, + "grad_norm": 1.1695326566696167, + "learning_rate": 1.1582812952357039e-05, + "loss": 0.1993, + "step": 570 + }, + { + "epoch": 1.803767660910518, + "grad_norm": 1.2991739511489868, + "learning_rate": 1.1330944247707935e-05, + "loss": 0.1911, + "step": 575 + }, + { + "epoch": 1.8194662480376766, + "grad_norm": 1.30764639377594, + "learning_rate": 1.1080171151807095e-05, + "loss": 0.1643, + "step": 580 + }, + { + "epoch": 1.8351648351648353, + "grad_norm": 1.1035630702972412, + "learning_rate": 1.0830568547462595e-05, + "loss": 0.1634, + "step": 585 + }, + { + "epoch": 1.8508634222919937, + "grad_norm": 1.3061965703964233, + "learning_rate": 1.0582210967964569e-05, + "loss": 0.1996, + "step": 590 + }, + { + "epoch": 1.8665620094191522, + "grad_norm": 1.129275918006897, + "learning_rate": 1.0335172574828992e-05, + "loss": 0.1796, + "step": 595 + }, + { + "epoch": 1.8822605965463108, + "grad_norm": 1.1369765996932983, + "learning_rate": 1.0089527135652483e-05, + "loss": 0.1714, + "step": 600 + }, + { + "epoch": 1.8979591836734695, + "grad_norm": 1.1140309572219849, + "learning_rate": 9.8453480020847e-06, + "loss": 0.1721, + "step": 605 + }, + { + "epoch": 1.913657770800628, + "grad_norm": 1.1335960626602173, + "learning_rate": 9.602708087924985e-06, + "loss": 0.1662, + "step": 610 + }, + { + "epoch": 1.9293563579277864, + "grad_norm": 1.2526637315750122, + "learning_rate": 9.361679847349715e-06, + "loss": 0.1635, + "step": 615 + }, + { + "epoch": 1.945054945054945, + "grad_norm": 1.1506094932556152, + "learning_rate": 9.122335253276932e-06, + "loss": 0.1728, + "step": 620 + }, + { + "epoch": 1.9607535321821037, + "grad_norm": 1.2842040061950684, + "learning_rate": 8.88474577587467e-06, + "loss": 0.1909, + "step": 625 + }, + { + "epoch": 1.9764521193092621, + "grad_norm": 1.2882108688354492, + "learning_rate": 8.648982361219381e-06, + "loss": 0.1835, + "step": 630 + }, + { + "epoch": 1.9921507064364206, + "grad_norm": 1.1103945970535278, + "learning_rate": 8.415115410110908e-06, + "loss": 0.155, + "step": 635 + }, + { + "epoch": 2.0062794348508635, + "grad_norm": 1.2153977155685425, + "learning_rate": 8.183214757050242e-06, + "loss": 0.1544, + "step": 640 + }, + { + "epoch": 2.021978021978022, + "grad_norm": 0.9909257888793945, + "learning_rate": 7.95334964938639e-06, + "loss": 0.1284, + "step": 645 + }, + { + "epoch": 2.0376766091051803, + "grad_norm": 0.9187225103378296, + "learning_rate": 7.725588726638591e-06, + "loss": 0.1407, + "step": 650 + }, + { + "epoch": 2.053375196232339, + "grad_norm": 1.2221633195877075, + "learning_rate": 7.500000000000004e-06, + "loss": 0.1302, + "step": 655 + }, + { + "epoch": 2.0690737833594977, + "grad_norm": 1.0427472591400146, + "learning_rate": 7.276650832029032e-06, + "loss": 0.1337, + "step": 660 + }, + { + "epoch": 2.084772370486656, + "grad_norm": 1.0458767414093018, + "learning_rate": 7.0556079165343475e-06, + "loss": 0.1159, + "step": 665 + }, + { + "epoch": 2.100470957613815, + "grad_norm": 1.1448655128479004, + "learning_rate": 6.836937258659589e-06, + "loss": 0.1259, + "step": 670 + }, + { + "epoch": 2.1161695447409734, + "grad_norm": 0.9684000015258789, + "learning_rate": 6.620704155173724e-06, + "loss": 0.1418, + "step": 675 + }, + { + "epoch": 2.131868131868132, + "grad_norm": 1.3454493284225464, + "learning_rate": 6.406973174972901e-06, + "loss": 0.1296, + "step": 680 + }, + { + "epoch": 2.1475667189952903, + "grad_norm": 1.1455661058425903, + "learning_rate": 6.1958081397996855e-06, + "loss": 0.1058, + "step": 685 + }, + { + "epoch": 2.163265306122449, + "grad_norm": 1.1463186740875244, + "learning_rate": 5.987272105185388e-06, + "loss": 0.131, + "step": 690 + }, + { + "epoch": 2.1789638932496076, + "grad_norm": 0.8651219606399536, + "learning_rate": 5.781427341621183e-06, + "loss": 0.1109, + "step": 695 + }, + { + "epoch": 2.194662480376766, + "grad_norm": 1.3981276750564575, + "learning_rate": 5.5783353159636625e-06, + "loss": 0.1283, + "step": 700 + }, + { + "epoch": 2.2103610675039245, + "grad_norm": 1.041352391242981, + "learning_rate": 5.3780566730803445e-06, + "loss": 0.1121, + "step": 705 + }, + { + "epoch": 2.2260596546310834, + "grad_norm": 0.9146772027015686, + "learning_rate": 5.180651217740642e-06, + "loss": 0.1036, + "step": 710 + }, + { + "epoch": 2.241758241758242, + "grad_norm": 1.176609992980957, + "learning_rate": 4.986177896757696e-06, + "loss": 0.1003, + "step": 715 + }, + { + "epoch": 2.2574568288854002, + "grad_norm": 0.9320028424263, + "learning_rate": 4.794694781386393e-06, + "loss": 0.106, + "step": 720 + }, + { + "epoch": 2.2731554160125587, + "grad_norm": 0.9586503505706787, + "learning_rate": 4.606259049982822e-06, + "loss": 0.1137, + "step": 725 + }, + { + "epoch": 2.2888540031397175, + "grad_norm": 1.1320774555206299, + "learning_rate": 4.420926970930394e-06, + "loss": 0.0963, + "step": 730 + }, + { + "epoch": 2.304552590266876, + "grad_norm": 1.110286831855774, + "learning_rate": 4.23875388583765e-06, + "loss": 0.1269, + "step": 735 + }, + { + "epoch": 2.3202511773940344, + "grad_norm": 0.9210241436958313, + "learning_rate": 4.059794193012813e-06, + "loss": 0.1162, + "step": 740 + }, + { + "epoch": 2.3359497645211933, + "grad_norm": 0.9188780188560486, + "learning_rate": 3.884101331220049e-06, + "loss": 0.1045, + "step": 745 + }, + { + "epoch": 2.3516483516483517, + "grad_norm": 0.788155198097229, + "learning_rate": 3.7117277637222235e-06, + "loss": 0.0952, + "step": 750 + }, + { + "epoch": 2.36734693877551, + "grad_norm": 0.7043110728263855, + "learning_rate": 3.5427249626149545e-06, + "loss": 0.1083, + "step": 755 + }, + { + "epoch": 2.3830455259026686, + "grad_norm": 0.8441283702850342, + "learning_rate": 3.3771433934566532e-06, + "loss": 0.0939, + "step": 760 + }, + { + "epoch": 2.3987441130298275, + "grad_norm": 0.6979141235351562, + "learning_rate": 3.215032500199106e-06, + "loss": 0.0963, + "step": 765 + }, + { + "epoch": 2.414442700156986, + "grad_norm": 0.7478846311569214, + "learning_rate": 3.056440690423127e-06, + "loss": 0.0901, + "step": 770 + }, + { + "epoch": 2.4301412872841444, + "grad_norm": 0.8405168652534485, + "learning_rate": 2.9014153208836764e-06, + "loss": 0.1009, + "step": 775 + }, + { + "epoch": 2.445839874411303, + "grad_norm": 0.8138031363487244, + "learning_rate": 2.750002683368761e-06, + "loss": 0.0916, + "step": 780 + }, + { + "epoch": 2.4615384615384617, + "grad_norm": 0.8422724604606628, + "learning_rate": 2.602247990876346e-06, + "loss": 0.0891, + "step": 785 + }, + { + "epoch": 2.47723704866562, + "grad_norm": 1.0071989297866821, + "learning_rate": 2.4581953641133975e-06, + "loss": 0.0861, + "step": 790 + }, + { + "epoch": 2.4929356357927785, + "grad_norm": 0.8705869913101196, + "learning_rate": 2.3178878183210966e-06, + "loss": 0.0968, + "step": 795 + }, + { + "epoch": 2.5086342229199374, + "grad_norm": 0.7169772386550903, + "learning_rate": 2.181367250430153e-06, + "loss": 0.1015, + "step": 800 + }, + { + "epoch": 2.524332810047096, + "grad_norm": 0.8356963396072388, + "learning_rate": 2.0486744265500477e-06, + "loss": 0.1109, + "step": 805 + }, + { + "epoch": 2.5400313971742543, + "grad_norm": 0.8454916477203369, + "learning_rate": 1.9198489697959635e-06, + "loss": 0.091, + "step": 810 + }, + { + "epoch": 2.5557299843014127, + "grad_norm": 0.8273601531982422, + "learning_rate": 1.7949293484570045e-06, + "loss": 0.1094, + "step": 815 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.965569019317627, + "learning_rate": 1.6739528645092666e-06, + "loss": 0.0935, + "step": 820 + }, + { + "epoch": 2.58712715855573, + "grad_norm": 0.7581872940063477, + "learning_rate": 1.556955642477177e-06, + "loss": 0.0922, + "step": 825 + }, + { + "epoch": 2.6028257456828885, + "grad_norm": 0.6589751243591309, + "learning_rate": 1.4439726186464264e-06, + "loss": 0.0872, + "step": 830 + }, + { + "epoch": 2.618524332810047, + "grad_norm": 0.8003440499305725, + "learning_rate": 1.335037530631727e-06, + "loss": 0.0993, + "step": 835 + }, + { + "epoch": 2.634222919937206, + "grad_norm": 0.7599704265594482, + "learning_rate": 1.2301829073024833e-06, + "loss": 0.1016, + "step": 840 + }, + { + "epoch": 2.6499215070643642, + "grad_norm": 0.674392580986023, + "learning_rate": 1.1294400590694348e-06, + "loss": 0.0797, + "step": 845 + }, + { + "epoch": 2.6656200941915227, + "grad_norm": 0.8874809145927429, + "learning_rate": 1.0328390685351085e-06, + "loss": 0.0696, + "step": 850 + }, + { + "epoch": 2.6813186813186816, + "grad_norm": 0.9264867901802063, + "learning_rate": 9.404087815109241e-07, + "loss": 0.0795, + "step": 855 + }, + { + "epoch": 2.69701726844584, + "grad_norm": 0.6587774753570557, + "learning_rate": 8.521767984036083e-07, + "loss": 0.0977, + "step": 860 + }, + { + "epoch": 2.7127158555729984, + "grad_norm": 0.7944997549057007, + "learning_rate": 7.681694659734995e-07, + "loss": 0.09, + "step": 865 + }, + { + "epoch": 2.728414442700157, + "grad_norm": 0.802121639251709, + "learning_rate": 6.88411869467196e-07, + "loss": 0.0806, + "step": 870 + }, + { + "epoch": 2.7441130298273153, + "grad_norm": 0.6392430663108826, + "learning_rate": 6.12927825126906e-07, + "loss": 0.1037, + "step": 875 + }, + { + "epoch": 2.759811616954474, + "grad_norm": 0.8168322443962097, + "learning_rate": 5.417398730787321e-07, + "loss": 0.0929, + "step": 880 + }, + { + "epoch": 2.7755102040816326, + "grad_norm": 0.7503212690353394, + "learning_rate": 4.748692706020125e-07, + "loss": 0.0849, + "step": 885 + }, + { + "epoch": 2.791208791208791, + "grad_norm": 0.7418519258499146, + "learning_rate": 4.1233598578173104e-07, + "loss": 0.0823, + "step": 890 + }, + { + "epoch": 2.80690737833595, + "grad_norm": 0.8550965189933777, + "learning_rate": 3.5415869154589495e-07, + "loss": 0.0889, + "step": 895 + }, + { + "epoch": 2.8226059654631084, + "grad_norm": 0.7465653419494629, + "learning_rate": 3.0035476008964337e-07, + "loss": 0.0968, + "step": 900 + }, + { + "epoch": 2.838304552590267, + "grad_norm": 0.6125336289405823, + "learning_rate": 2.5094025768778506e-07, + "loss": 0.085, + "step": 905 + }, + { + "epoch": 2.8540031397174257, + "grad_norm": 0.820423424243927, + "learning_rate": 2.0592993989727305e-07, + "loss": 0.1049, + "step": 910 + }, + { + "epoch": 2.869701726844584, + "grad_norm": 0.7673197984695435, + "learning_rate": 1.6533724715108555e-07, + "loss": 0.1078, + "step": 915 + }, + { + "epoch": 2.8854003139717426, + "grad_norm": 0.6353623867034912, + "learning_rate": 1.2917430074480819e-07, + "loss": 0.086, + "step": 920 + }, + { + "epoch": 2.901098901098901, + "grad_norm": 0.8423333168029785, + "learning_rate": 9.745189921711583e-08, + "loss": 0.1009, + "step": 925 + }, + { + "epoch": 2.9167974882260594, + "grad_norm": 0.8508203029632568, + "learning_rate": 7.017951512524879e-08, + "loss": 0.0863, + "step": 930 + }, + { + "epoch": 2.9324960753532183, + "grad_norm": 0.6977843046188354, + "learning_rate": 4.7365292216431934e-08, + "loss": 0.0819, + "step": 935 + }, + { + "epoch": 2.9481946624803768, + "grad_norm": 0.6996131539344788, + "learning_rate": 2.9016042996085067e-08, + "loss": 0.1032, + "step": 940 + }, + { + "epoch": 2.963893249607535, + "grad_norm": 0.6902872920036316, + "learning_rate": 1.513724669356009e-08, + "loss": 0.0942, + "step": 945 + }, + { + "epoch": 2.979591836734694, + "grad_norm": 0.7892916798591614, + "learning_rate": 5.733047625998134e-09, + "loss": 0.0966, + "step": 950 + }, + { + "epoch": 2.9952904238618525, + "grad_norm": 0.7816653251647949, + "learning_rate": 8.062539608028052e-10, + "loss": 0.0901, + "step": 955 + }, + { + "epoch": 3.0, + "step": 957, + "total_flos": 1.1998547259110195e+18, + "train_loss": 0.438542250181814, + "train_runtime": 545.6254, + "train_samples_per_second": 56.022, + "train_steps_per_second": 1.754 + } + ], + "logging_steps": 5, + "max_steps": 957, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1998547259110195e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..64d24ecab5c644f77b8611dd95cccb051f9c394e --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/11_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a83ad407a8dbf446b2ebe439965f54554ee5850f834f9746b2c564bd679c1186 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..55f8bacf9bb8cca3a874bc8d3fc64de98ec4ed5d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 12_128_e3_3e-5 + results: [] +--- + + + +# 12_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..665abaa8776ce18f69bbf8eeebfe35652713f46b --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "up_proj", + "k_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89dcbd7b611bddbb2bf15213d259074de9b3abcc --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1872254f76987806aa0205bfd2cc067e664ff4c18331f262a169ee75f930092 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..562ccbc2949c4a88821ed1094c979dbb7078a4ea --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.258493972385366e+18, + "train_loss": 0.4077561020677361, + "train_runtime": 555.4384, + "train_samples": 10970, + "train_samples_per_second": 59.25, + "train_steps_per_second": 1.853 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..562ccbc2949c4a88821ed1094c979dbb7078a4ea --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.258493972385366e+18, + "train_loss": 0.4077561020677361, + "train_runtime": 555.4384, + "train_samples": 10970, + "train_samples_per_second": 59.25, + "train_steps_per_second": 1.853 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97ecffab1f7218d4eda29dc61651984f9a9ba7c4 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1478 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1029, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014577259475218658, + "grad_norm": 0.6806447505950928, + "learning_rate": 2.307692307692308e-06, + "loss": 1.5493, + "step": 5 + }, + { + "epoch": 0.029154518950437316, + "grad_norm": 0.661142885684967, + "learning_rate": 5.192307692307692e-06, + "loss": 1.6034, + "step": 10 + }, + { + "epoch": 0.043731778425655975, + "grad_norm": 0.5689885020256042, + "learning_rate": 8.076923076923077e-06, + "loss": 1.6118, + "step": 15 + }, + { + "epoch": 0.05830903790087463, + "grad_norm": 0.4580037593841553, + "learning_rate": 1.0961538461538462e-05, + "loss": 1.5896, + "step": 20 + }, + { + "epoch": 0.0728862973760933, + "grad_norm": 0.5390421748161316, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.5211, + "step": 25 + }, + { + "epoch": 0.08746355685131195, + "grad_norm": 0.5386611223220825, + "learning_rate": 1.673076923076923e-05, + "loss": 1.5119, + "step": 30 + }, + { + "epoch": 0.10204081632653061, + "grad_norm": 0.4481329917907715, + "learning_rate": 1.9615384615384617e-05, + "loss": 1.4585, + "step": 35 + }, + { + "epoch": 0.11661807580174927, + "grad_norm": 0.4693893790245056, + "learning_rate": 2.25e-05, + "loss": 1.4636, + "step": 40 + }, + { + "epoch": 0.13119533527696792, + "grad_norm": 0.47576284408569336, + "learning_rate": 2.5384615384615386e-05, + "loss": 1.4396, + "step": 45 + }, + { + "epoch": 0.1457725947521866, + "grad_norm": 0.5577271580696106, + "learning_rate": 2.8269230769230768e-05, + "loss": 1.4521, + "step": 50 + }, + { + "epoch": 0.16034985422740525, + "grad_norm": 0.5224016904830933, + "learning_rate": 2.999968980815508e-05, + "loss": 1.3608, + "step": 55 + }, + { + "epoch": 0.1749271137026239, + "grad_norm": 0.5276957750320435, + "learning_rate": 2.999620029723336e-05, + "loss": 1.3416, + "step": 60 + }, + { + "epoch": 0.18950437317784258, + "grad_norm": 0.5785362124443054, + "learning_rate": 2.998883444058916e-05, + "loss": 1.2402, + "step": 65 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.6616777181625366, + "learning_rate": 2.9977594142211905e-05, + "loss": 1.34, + "step": 70 + }, + { + "epoch": 0.21865889212827988, + "grad_norm": 0.6435102224349976, + "learning_rate": 2.996248230758977e-05, + "loss": 1.3057, + "step": 75 + }, + { + "epoch": 0.23323615160349853, + "grad_norm": 0.587324857711792, + "learning_rate": 2.9943502842958625e-05, + "loss": 1.1948, + "step": 80 + }, + { + "epoch": 0.2478134110787172, + "grad_norm": 0.5977291464805603, + "learning_rate": 2.992066065429232e-05, + "loss": 1.2038, + "step": 85 + }, + { + "epoch": 0.26239067055393583, + "grad_norm": 0.6467478275299072, + "learning_rate": 2.989396164603455e-05, + "loss": 1.184, + "step": 90 + }, + { + "epoch": 0.27696793002915454, + "grad_norm": 0.6503518223762512, + "learning_rate": 2.986341271957263e-05, + "loss": 1.1425, + "step": 95 + }, + { + "epoch": 0.2915451895043732, + "grad_norm": 0.7236235737800598, + "learning_rate": 2.9829021771453552e-05, + "loss": 1.1665, + "step": 100 + }, + { + "epoch": 0.30612244897959184, + "grad_norm": 0.8651178479194641, + "learning_rate": 2.9790797691342838e-05, + "loss": 1.0794, + "step": 105 + }, + { + "epoch": 0.3206997084548105, + "grad_norm": 0.8108046054840088, + "learning_rate": 2.9748750359726635e-05, + "loss": 1.0588, + "step": 110 + }, + { + "epoch": 0.33527696793002915, + "grad_norm": 0.7440317869186401, + "learning_rate": 2.9702890645357753e-05, + "loss": 1.0423, + "step": 115 + }, + { + "epoch": 0.3498542274052478, + "grad_norm": 0.7216150164604187, + "learning_rate": 2.9653230402446202e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.36443148688046645, + "grad_norm": 0.7735509872436523, + "learning_rate": 2.959978246759501e-05, + "loss": 0.983, + "step": 125 + }, + { + "epoch": 0.37900874635568516, + "grad_norm": 0.9040346741676331, + "learning_rate": 2.9542560656482116e-05, + "loss": 0.9551, + "step": 130 + }, + { + "epoch": 0.3935860058309038, + "grad_norm": 0.8306506276130676, + "learning_rate": 2.9481579760289176e-05, + "loss": 0.9827, + "step": 135 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0011217594146729, + "learning_rate": 2.9416855541878215e-05, + "loss": 0.9047, + "step": 140 + }, + { + "epoch": 0.4227405247813411, + "grad_norm": 0.9357397556304932, + "learning_rate": 2.9348404731717092e-05, + "loss": 0.8923, + "step": 145 + }, + { + "epoch": 0.43731778425655976, + "grad_norm": 1.0381397008895874, + "learning_rate": 2.9276245023554888e-05, + "loss": 0.9095, + "step": 150 + }, + { + "epoch": 0.4518950437317784, + "grad_norm": 0.9463521242141724, + "learning_rate": 2.9200395069848253e-05, + "loss": 0.8743, + "step": 155 + }, + { + "epoch": 0.46647230320699706, + "grad_norm": 1.0434989929199219, + "learning_rate": 2.912087447693999e-05, + "loss": 0.8185, + "step": 160 + }, + { + "epoch": 0.48104956268221577, + "grad_norm": 0.9781724810600281, + "learning_rate": 2.9037703799991007e-05, + "loss": 0.782, + "step": 165 + }, + { + "epoch": 0.4956268221574344, + "grad_norm": 1.0015380382537842, + "learning_rate": 2.8950904537667057e-05, + "loss": 0.8519, + "step": 170 + }, + { + "epoch": 0.5102040816326531, + "grad_norm": 0.9412116408348083, + "learning_rate": 2.8860499126581593e-05, + "loss": 0.7842, + "step": 175 + }, + { + "epoch": 0.5247813411078717, + "grad_norm": 0.9596832990646362, + "learning_rate": 2.876651093549613e-05, + "loss": 0.78, + "step": 180 + }, + { + "epoch": 0.5393586005830904, + "grad_norm": 1.004402756690979, + "learning_rate": 2.866896425927973e-05, + "loss": 0.6679, + "step": 185 + }, + { + "epoch": 0.5539358600583091, + "grad_norm": 1.1706701517105103, + "learning_rate": 2.8567884312629023e-05, + "loss": 0.7814, + "step": 190 + }, + { + "epoch": 0.5685131195335277, + "grad_norm": 0.9860739707946777, + "learning_rate": 2.8463297223550495e-05, + "loss": 0.7151, + "step": 195 + }, + { + "epoch": 0.5830903790087464, + "grad_norm": 1.0833741426467896, + "learning_rate": 2.8355230026606743e-05, + "loss": 0.7048, + "step": 200 + }, + { + "epoch": 0.597667638483965, + "grad_norm": 1.028178095817566, + "learning_rate": 2.8243710655928287e-05, + "loss": 0.6942, + "step": 205 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 1.0778172016143799, + "learning_rate": 2.8128767937992963e-05, + "loss": 0.6528, + "step": 210 + }, + { + "epoch": 0.6268221574344023, + "grad_norm": 1.149814248085022, + "learning_rate": 2.8010431584174605e-05, + "loss": 0.6982, + "step": 215 + }, + { + "epoch": 0.641399416909621, + "grad_norm": 1.1449440717697144, + "learning_rate": 2.788873218306298e-05, + "loss": 0.5924, + "step": 220 + }, + { + "epoch": 0.6559766763848397, + "grad_norm": 1.1645174026489258, + "learning_rate": 2.7763701192556985e-05, + "loss": 0.632, + "step": 225 + }, + { + "epoch": 0.6705539358600583, + "grad_norm": 1.1523641347885132, + "learning_rate": 2.7635370931733167e-05, + "loss": 0.6211, + "step": 230 + }, + { + "epoch": 0.685131195335277, + "grad_norm": 1.0156079530715942, + "learning_rate": 2.750377457249159e-05, + "loss": 0.6051, + "step": 235 + }, + { + "epoch": 0.6997084548104956, + "grad_norm": 1.1002193689346313, + "learning_rate": 2.7368946130981286e-05, + "loss": 0.6002, + "step": 240 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.1213942766189575, + "learning_rate": 2.7230920458807473e-05, + "loss": 0.6294, + "step": 245 + }, + { + "epoch": 0.7288629737609329, + "grad_norm": 1.2468255758285522, + "learning_rate": 2.708973323402281e-05, + "loss": 0.5846, + "step": 250 + }, + { + "epoch": 0.7434402332361516, + "grad_norm": 1.0495729446411133, + "learning_rate": 2.6945420951905038e-05, + "loss": 0.6228, + "step": 255 + }, + { + "epoch": 0.7580174927113703, + "grad_norm": 1.1178947687149048, + "learning_rate": 2.6798020915523362e-05, + "loss": 0.5381, + "step": 260 + }, + { + "epoch": 0.7725947521865889, + "grad_norm": 1.3102003335952759, + "learning_rate": 2.6647571226096027e-05, + "loss": 0.573, + "step": 265 + }, + { + "epoch": 0.7871720116618076, + "grad_norm": 1.1478500366210938, + "learning_rate": 2.6494110773141627e-05, + "loss": 0.5653, + "step": 270 + }, + { + "epoch": 0.8017492711370262, + "grad_norm": 1.1622207164764404, + "learning_rate": 2.633767922442656e-05, + "loss": 0.6267, + "step": 275 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 1.20098078250885, + "learning_rate": 2.6178317015711397e-05, + "loss": 0.5607, + "step": 280 + }, + { + "epoch": 0.8309037900874635, + "grad_norm": 1.1245421171188354, + "learning_rate": 2.6016065340298706e-05, + "loss": 0.5631, + "step": 285 + }, + { + "epoch": 0.8454810495626822, + "grad_norm": 1.5809661149978638, + "learning_rate": 2.585096613838502e-05, + "loss": 0.5625, + "step": 290 + }, + { + "epoch": 0.8600583090379009, + "grad_norm": 1.435782790184021, + "learning_rate": 2.5683062086219837e-05, + "loss": 0.5327, + "step": 295 + }, + { + "epoch": 0.8746355685131195, + "grad_norm": 1.273697018623352, + "learning_rate": 2.5512396585074234e-05, + "loss": 0.5445, + "step": 300 + }, + { + "epoch": 0.8892128279883382, + "grad_norm": 1.1391314268112183, + "learning_rate": 2.533901375002218e-05, + "loss": 0.5212, + "step": 305 + }, + { + "epoch": 0.9037900874635568, + "grad_norm": 1.1281670331954956, + "learning_rate": 2.516295839853728e-05, + "loss": 0.5138, + "step": 310 + }, + { + "epoch": 0.9183673469387755, + "grad_norm": 1.2149471044540405, + "learning_rate": 2.498427603890798e-05, + "loss": 0.5286, + "step": 315 + }, + { + "epoch": 0.9329446064139941, + "grad_norm": 1.1315255165100098, + "learning_rate": 2.480301285847418e-05, + "loss": 0.4537, + "step": 320 + }, + { + "epoch": 0.9475218658892128, + "grad_norm": 1.3806989192962646, + "learning_rate": 2.461921571168836e-05, + "loss": 0.4404, + "step": 325 + }, + { + "epoch": 0.9620991253644315, + "grad_norm": 1.2474677562713623, + "learning_rate": 2.4432932108004214e-05, + "loss": 0.431, + "step": 330 + }, + { + "epoch": 0.9766763848396501, + "grad_norm": 1.2107288837432861, + "learning_rate": 2.4244210199596038e-05, + "loss": 0.47, + "step": 335 + }, + { + "epoch": 0.9912536443148688, + "grad_norm": 1.4301996231079102, + "learning_rate": 2.4053098768911905e-05, + "loss": 0.4007, + "step": 340 + }, + { + "epoch": 1.0058309037900874, + "grad_norm": 1.127027153968811, + "learning_rate": 2.385964721606399e-05, + "loss": 0.4329, + "step": 345 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 1.360874056816101, + "learning_rate": 2.3663905546059192e-05, + "loss": 0.4182, + "step": 350 + }, + { + "epoch": 1.0349854227405249, + "grad_norm": 2.0590879917144775, + "learning_rate": 2.3465924355873433e-05, + "loss": 0.3567, + "step": 355 + }, + { + "epoch": 1.0495626822157433, + "grad_norm": 1.2471686601638794, + "learning_rate": 2.326575482137289e-05, + "loss": 0.3833, + "step": 360 + }, + { + "epoch": 1.064139941690962, + "grad_norm": 1.1468981504440308, + "learning_rate": 2.3063448684085653e-05, + "loss": 0.3535, + "step": 365 + }, + { + "epoch": 1.0787172011661808, + "grad_norm": 1.1463680267333984, + "learning_rate": 2.2859058237827114e-05, + "loss": 0.3166, + "step": 370 + }, + { + "epoch": 1.0932944606413995, + "grad_norm": 1.2454944849014282, + "learning_rate": 2.265263631518261e-05, + "loss": 0.3506, + "step": 375 + }, + { + "epoch": 1.1078717201166182, + "grad_norm": 1.3829729557037354, + "learning_rate": 2.2444236273850837e-05, + "loss": 0.3463, + "step": 380 + }, + { + "epoch": 1.1224489795918366, + "grad_norm": 1.212926983833313, + "learning_rate": 2.2233911982851452e-05, + "loss": 0.3367, + "step": 385 + }, + { + "epoch": 1.1370262390670554, + "grad_norm": 1.3720661401748657, + "learning_rate": 2.2021717808600602e-05, + "loss": 0.3805, + "step": 390 + }, + { + "epoch": 1.151603498542274, + "grad_norm": 1.1316945552825928, + "learning_rate": 2.1807708600857773e-05, + "loss": 0.3479, + "step": 395 + }, + { + "epoch": 1.1661807580174928, + "grad_norm": 1.4008411169052124, + "learning_rate": 2.15919396785478e-05, + "loss": 0.3027, + "step": 400 + }, + { + "epoch": 1.1807580174927113, + "grad_norm": 1.3179919719696045, + "learning_rate": 2.1374466815461536e-05, + "loss": 0.3854, + "step": 405 + }, + { + "epoch": 1.19533527696793, + "grad_norm": 1.1257389783859253, + "learning_rate": 2.1155346225838994e-05, + "loss": 0.2823, + "step": 410 + }, + { + "epoch": 1.2099125364431487, + "grad_norm": 1.2019569873809814, + "learning_rate": 2.0934634549838567e-05, + "loss": 0.284, + "step": 415 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 1.4042391777038574, + "learning_rate": 2.0712388838896236e-05, + "loss": 0.3082, + "step": 420 + }, + { + "epoch": 1.239067055393586, + "grad_norm": 1.1345500946044922, + "learning_rate": 2.0488666540978405e-05, + "loss": 0.3148, + "step": 425 + }, + { + "epoch": 1.2536443148688048, + "grad_norm": 1.4181177616119385, + "learning_rate": 2.0263525485732285e-05, + "loss": 0.3065, + "step": 430 + }, + { + "epoch": 1.2682215743440233, + "grad_norm": 1.2415271997451782, + "learning_rate": 2.0037023869537567e-05, + "loss": 0.2921, + "step": 435 + }, + { + "epoch": 1.282798833819242, + "grad_norm": 1.2775797843933105, + "learning_rate": 1.980922024046333e-05, + "loss": 0.2675, + "step": 440 + }, + { + "epoch": 1.2973760932944607, + "grad_norm": 1.1403851509094238, + "learning_rate": 1.9580173483134066e-05, + "loss": 0.2829, + "step": 445 + }, + { + "epoch": 1.3119533527696792, + "grad_norm": 1.424919605255127, + "learning_rate": 1.934994280350864e-05, + "loss": 0.2714, + "step": 450 + }, + { + "epoch": 1.3265306122448979, + "grad_norm": 1.1068094968795776, + "learning_rate": 1.9118587713576264e-05, + "loss": 0.2976, + "step": 455 + }, + { + "epoch": 1.3411078717201166, + "grad_norm": 1.140210747718811, + "learning_rate": 1.8886168015973288e-05, + "loss": 0.2729, + "step": 460 + }, + { + "epoch": 1.3556851311953353, + "grad_norm": 1.3625082969665527, + "learning_rate": 1.8652743788524952e-05, + "loss": 0.3014, + "step": 465 + }, + { + "epoch": 1.370262390670554, + "grad_norm": 1.181063175201416, + "learning_rate": 1.8418375368715927e-05, + "loss": 0.3074, + "step": 470 + }, + { + "epoch": 1.3848396501457727, + "grad_norm": 1.1019797325134277, + "learning_rate": 1.818312333809379e-05, + "loss": 0.2686, + "step": 475 + }, + { + "epoch": 1.3994169096209912, + "grad_norm": 1.2318238019943237, + "learning_rate": 1.7947048506609346e-05, + "loss": 0.2512, + "step": 480 + }, + { + "epoch": 1.41399416909621, + "grad_norm": 1.2212443351745605, + "learning_rate": 1.7710211896898013e-05, + "loss": 0.2633, + "step": 485 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.1095503568649292, + "learning_rate": 1.7472674728506124e-05, + "loss": 0.2427, + "step": 490 + }, + { + "epoch": 1.4431486880466473, + "grad_norm": 1.302907109260559, + "learning_rate": 1.72344984020664e-05, + "loss": 0.2408, + "step": 495 + }, + { + "epoch": 1.4577259475218658, + "grad_norm": 1.2924895286560059, + "learning_rate": 1.6995744483426586e-05, + "loss": 0.2328, + "step": 500 + }, + { + "epoch": 1.4723032069970845, + "grad_norm": 1.147236943244934, + "learning_rate": 1.6756474687735368e-05, + "loss": 0.2518, + "step": 505 + }, + { + "epoch": 1.4868804664723032, + "grad_norm": 1.534887433052063, + "learning_rate": 1.6516750863489755e-05, + "loss": 0.2088, + "step": 510 + }, + { + "epoch": 1.501457725947522, + "grad_norm": 1.2389196157455444, + "learning_rate": 1.627663497654791e-05, + "loss": 0.2205, + "step": 515 + }, + { + "epoch": 1.5160349854227406, + "grad_norm": 1.1809660196304321, + "learning_rate": 1.6036189094111707e-05, + "loss": 0.2226, + "step": 520 + }, + { + "epoch": 1.5306122448979593, + "grad_norm": 1.3470337390899658, + "learning_rate": 1.579547536868308e-05, + "loss": 0.2342, + "step": 525 + }, + { + "epoch": 1.5451895043731778, + "grad_norm": 1.1741876602172852, + "learning_rate": 1.5554556021998294e-05, + "loss": 0.2481, + "step": 530 + }, + { + "epoch": 1.5597667638483965, + "grad_norm": 1.2932168245315552, + "learning_rate": 1.531349332894435e-05, + "loss": 0.2055, + "step": 535 + }, + { + "epoch": 1.574344023323615, + "grad_norm": 1.2096325159072876, + "learning_rate": 1.5072349601461645e-05, + "loss": 0.2502, + "step": 540 + }, + { + "epoch": 1.5889212827988337, + "grad_norm": 1.0843802690505981, + "learning_rate": 1.4831187172437047e-05, + "loss": 0.2086, + "step": 545 + }, + { + "epoch": 1.6034985422740524, + "grad_norm": 1.2139958143234253, + "learning_rate": 1.4590068379591544e-05, + "loss": 0.1711, + "step": 550 + }, + { + "epoch": 1.6180758017492711, + "grad_norm": 2.1964111328125, + "learning_rate": 1.4349055549366696e-05, + "loss": 0.2245, + "step": 555 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 1.1589475870132446, + "learning_rate": 1.4108210980813923e-05, + "loss": 0.1998, + "step": 560 + }, + { + "epoch": 1.6472303206997085, + "grad_norm": 1.2828484773635864, + "learning_rate": 1.3867596929490935e-05, + "loss": 0.1705, + "step": 565 + }, + { + "epoch": 1.6618075801749272, + "grad_norm": 1.0008822679519653, + "learning_rate": 1.3627275591369394e-05, + "loss": 0.1812, + "step": 570 + }, + { + "epoch": 1.6763848396501457, + "grad_norm": 1.22234308719635, + "learning_rate": 1.3387309086757945e-05, + "loss": 0.1975, + "step": 575 + }, + { + "epoch": 1.6909620991253644, + "grad_norm": 1.2840313911437988, + "learning_rate": 1.3147759444244851e-05, + "loss": 0.1842, + "step": 580 + }, + { + "epoch": 1.7055393586005831, + "grad_norm": 1.1366820335388184, + "learning_rate": 1.2908688584664303e-05, + "loss": 0.1812, + "step": 585 + }, + { + "epoch": 1.7201166180758016, + "grad_norm": 1.2072747945785522, + "learning_rate": 1.2670158305090588e-05, + "loss": 0.1771, + "step": 590 + }, + { + "epoch": 1.7346938775510203, + "grad_norm": 1.249901533126831, + "learning_rate": 1.2432230262864278e-05, + "loss": 0.2016, + "step": 595 + }, + { + "epoch": 1.749271137026239, + "grad_norm": 1.1953479051589966, + "learning_rate": 1.2194965959654487e-05, + "loss": 0.1944, + "step": 600 + }, + { + "epoch": 1.7638483965014577, + "grad_norm": 1.0514099597930908, + "learning_rate": 1.1958426725561423e-05, + "loss": 0.1669, + "step": 605 + }, + { + "epoch": 1.7784256559766765, + "grad_norm": 1.1292775869369507, + "learning_rate": 1.1722673703263228e-05, + "loss": 0.1789, + "step": 610 + }, + { + "epoch": 1.7930029154518952, + "grad_norm": 1.1320688724517822, + "learning_rate": 1.1487767832211336e-05, + "loss": 0.1786, + "step": 615 + }, + { + "epoch": 1.8075801749271136, + "grad_norm": 1.0635093450546265, + "learning_rate": 1.125376983287829e-05, + "loss": 0.174, + "step": 620 + }, + { + "epoch": 1.8221574344023324, + "grad_norm": 1.0094761848449707, + "learning_rate": 1.1020740191062205e-05, + "loss": 0.1379, + "step": 625 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 1.3200459480285645, + "learning_rate": 1.0788739142251905e-05, + "loss": 0.156, + "step": 630 + }, + { + "epoch": 1.8513119533527695, + "grad_norm": 1.085430383682251, + "learning_rate": 1.0557826656056703e-05, + "loss": 0.1337, + "step": 635 + }, + { + "epoch": 1.8658892128279883, + "grad_norm": 1.1763797998428345, + "learning_rate": 1.032806242070499e-05, + "loss": 0.1809, + "step": 640 + }, + { + "epoch": 1.880466472303207, + "grad_norm": 1.1997064352035522, + "learning_rate": 1.0099505827615461e-05, + "loss": 0.1636, + "step": 645 + }, + { + "epoch": 1.8950437317784257, + "grad_norm": 1.0357952117919922, + "learning_rate": 9.872215956045165e-06, + "loss": 0.1661, + "step": 650 + }, + { + "epoch": 1.9096209912536444, + "grad_norm": 1.2929925918579102, + "learning_rate": 9.646251557818154e-06, + "loss": 0.1429, + "step": 655 + }, + { + "epoch": 1.924198250728863, + "grad_norm": 0.9631356000900269, + "learning_rate": 9.42167104213885e-06, + "loss": 0.1337, + "step": 660 + }, + { + "epoch": 1.9387755102040818, + "grad_norm": 1.1914238929748535, + "learning_rate": 9.198532460493878e-06, + "loss": 0.1624, + "step": 665 + }, + { + "epoch": 1.9533527696793003, + "grad_norm": 1.124827265739441, + "learning_rate": 8.976893491646494e-06, + "loss": 0.1392, + "step": 670 + }, + { + "epoch": 1.967930029154519, + "grad_norm": 0.9549169540405273, + "learning_rate": 8.756811426727236e-06, + "loss": 0.1263, + "step": 675 + }, + { + "epoch": 1.9825072886297375, + "grad_norm": 1.002400279045105, + "learning_rate": 8.538343154424838e-06, + "loss": 0.1486, + "step": 680 + }, + { + "epoch": 1.9970845481049562, + "grad_norm": 1.0860426425933838, + "learning_rate": 8.321545146281162e-06, + "loss": 0.1346, + "step": 685 + }, + { + "epoch": 2.011661807580175, + "grad_norm": 0.9849966764450073, + "learning_rate": 8.106473442093946e-06, + "loss": 0.1224, + "step": 690 + }, + { + "epoch": 2.0262390670553936, + "grad_norm": 0.8416151404380798, + "learning_rate": 7.893183635431172e-06, + "loss": 0.1222, + "step": 695 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.9396371245384216, + "learning_rate": 7.681730859260739e-06, + "loss": 0.1009, + "step": 700 + }, + { + "epoch": 2.055393586005831, + "grad_norm": 0.7691406607627869, + "learning_rate": 7.472169771699238e-06, + "loss": 0.0949, + "step": 705 + }, + { + "epoch": 2.0699708454810497, + "grad_norm": 1.117634654045105, + "learning_rate": 7.264554541883481e-06, + "loss": 0.1012, + "step": 710 + }, + { + "epoch": 2.0845481049562684, + "grad_norm": 1.024188756942749, + "learning_rate": 7.058938835968361e-06, + "loss": 0.11, + "step": 715 + }, + { + "epoch": 2.0991253644314867, + "grad_norm": 0.9934547543525696, + "learning_rate": 6.8553758032547935e-06, + "loss": 0.1111, + "step": 720 + }, + { + "epoch": 2.1137026239067054, + "grad_norm": 1.1368316411972046, + "learning_rate": 6.653918062451206e-06, + "loss": 0.1031, + "step": 725 + }, + { + "epoch": 2.128279883381924, + "grad_norm": 1.0136314630508423, + "learning_rate": 6.454617688072235e-06, + "loss": 0.1125, + "step": 730 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 0.8032100796699524, + "learning_rate": 6.257526196978049e-06, + "loss": 0.0979, + "step": 735 + }, + { + "epoch": 2.1574344023323615, + "grad_norm": 1.0812184810638428, + "learning_rate": 6.062694535057829e-06, + "loss": 0.1032, + "step": 740 + }, + { + "epoch": 2.17201166180758, + "grad_norm": 1.0859647989273071, + "learning_rate": 5.8701730640608354e-06, + "loss": 0.1024, + "step": 745 + }, + { + "epoch": 2.186588921282799, + "grad_norm": 1.0313900709152222, + "learning_rate": 5.680011548578479e-06, + "loss": 0.1023, + "step": 750 + }, + { + "epoch": 2.2011661807580176, + "grad_norm": 0.9997396469116211, + "learning_rate": 5.492259143180757e-06, + "loss": 0.0943, + "step": 755 + }, + { + "epoch": 2.2157434402332363, + "grad_norm": 0.7561673521995544, + "learning_rate": 5.306964379710347e-06, + "loss": 0.0944, + "step": 760 + }, + { + "epoch": 2.2303206997084546, + "grad_norm": 0.895535945892334, + "learning_rate": 5.124175154737676e-06, + "loss": 0.0851, + "step": 765 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 1.1456518173217773, + "learning_rate": 4.9439387171802265e-06, + "loss": 0.1067, + "step": 770 + }, + { + "epoch": 2.259475218658892, + "grad_norm": 1.0287911891937256, + "learning_rate": 4.766301656089219e-06, + "loss": 0.1187, + "step": 775 + }, + { + "epoch": 2.2740524781341107, + "grad_norm": 0.8683261871337891, + "learning_rate": 4.591309888606886e-06, + "loss": 0.0801, + "step": 780 + }, + { + "epoch": 2.2886297376093294, + "grad_norm": 0.958232581615448, + "learning_rate": 4.419008648097402e-06, + "loss": 0.0989, + "step": 785 + }, + { + "epoch": 2.303206997084548, + "grad_norm": 0.906602144241333, + "learning_rate": 4.249442472454586e-06, + "loss": 0.1023, + "step": 790 + }, + { + "epoch": 2.317784256559767, + "grad_norm": 0.7730385661125183, + "learning_rate": 4.082655192589382e-06, + "loss": 0.0953, + "step": 795 + }, + { + "epoch": 2.3323615160349855, + "grad_norm": 0.7801249623298645, + "learning_rate": 3.918689921100051e-06, + "loss": 0.0786, + "step": 800 + }, + { + "epoch": 2.3469387755102042, + "grad_norm": 0.9047537446022034, + "learning_rate": 3.7575890411280733e-06, + "loss": 0.0902, + "step": 805 + }, + { + "epoch": 2.3615160349854225, + "grad_norm": 0.7723002433776855, + "learning_rate": 3.59939419540257e-06, + "loss": 0.1084, + "step": 810 + }, + { + "epoch": 2.376093294460641, + "grad_norm": 0.9021700620651245, + "learning_rate": 3.4441462754761655e-06, + "loss": 0.0975, + "step": 815 + }, + { + "epoch": 2.39067055393586, + "grad_norm": 1.0443731546401978, + "learning_rate": 3.291885411154973e-06, + "loss": 0.0857, + "step": 820 + }, + { + "epoch": 2.4052478134110786, + "grad_norm": 0.7361878752708435, + "learning_rate": 3.1426509601254955e-06, + "loss": 0.0949, + "step": 825 + }, + { + "epoch": 2.4198250728862973, + "grad_norm": 0.7225300669670105, + "learning_rate": 2.9964814977811327e-06, + "loss": 0.0785, + "step": 830 + }, + { + "epoch": 2.434402332361516, + "grad_norm": 1.0007984638214111, + "learning_rate": 2.8534148072508637e-06, + "loss": 0.0815, + "step": 835 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.7571635246276855, + "learning_rate": 2.713487869632762e-06, + "loss": 0.1015, + "step": 840 + }, + { + "epoch": 2.4635568513119535, + "grad_norm": 0.829801619052887, + "learning_rate": 2.576736854434793e-06, + "loss": 0.0757, + "step": 845 + }, + { + "epoch": 2.478134110787172, + "grad_norm": 0.6935174465179443, + "learning_rate": 2.4431971102254176e-06, + "loss": 0.0859, + "step": 850 + }, + { + "epoch": 2.4927113702623904, + "grad_norm": 0.617611825466156, + "learning_rate": 2.312903155496382e-06, + "loss": 0.0805, + "step": 855 + }, + { + "epoch": 2.5072886297376096, + "grad_norm": 0.995196521282196, + "learning_rate": 2.1858886697400916e-06, + "loss": 0.085, + "step": 860 + }, + { + "epoch": 2.521865889212828, + "grad_norm": 0.8552947640419006, + "learning_rate": 2.062186484743858e-06, + "loss": 0.0799, + "step": 865 + }, + { + "epoch": 2.5364431486880465, + "grad_norm": 0.6557287573814392, + "learning_rate": 1.9418285761032432e-06, + "loss": 0.072, + "step": 870 + }, + { + "epoch": 2.5510204081632653, + "grad_norm": 0.8156692981719971, + "learning_rate": 1.8248460549567375e-06, + "loss": 0.0971, + "step": 875 + }, + { + "epoch": 2.565597667638484, + "grad_norm": 0.6623476147651672, + "learning_rate": 1.7112691599438963e-06, + "loss": 0.0844, + "step": 880 + }, + { + "epoch": 2.5801749271137027, + "grad_norm": 0.7443599104881287, + "learning_rate": 1.6011272493889879e-06, + "loss": 0.0809, + "step": 885 + }, + { + "epoch": 2.5947521865889214, + "grad_norm": 0.8318702578544617, + "learning_rate": 1.494448793712217e-06, + "loss": 0.0764, + "step": 890 + }, + { + "epoch": 2.60932944606414, + "grad_norm": 0.9382652044296265, + "learning_rate": 1.3912613680704284e-06, + "loss": 0.0827, + "step": 895 + }, + { + "epoch": 2.6239067055393583, + "grad_norm": 0.8903673887252808, + "learning_rate": 1.2915916452292598e-06, + "loss": 0.0891, + "step": 900 + }, + { + "epoch": 2.6384839650145775, + "grad_norm": 0.8623375296592712, + "learning_rate": 1.1954653886685356e-06, + "loss": 0.088, + "step": 905 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.6665174961090088, + "learning_rate": 1.102907445922703e-06, + "loss": 0.0767, + "step": 910 + }, + { + "epoch": 2.6676384839650145, + "grad_norm": 0.8988716006278992, + "learning_rate": 1.0139417421580294e-06, + "loss": 0.0933, + "step": 915 + }, + { + "epoch": 2.682215743440233, + "grad_norm": 0.7019889950752258, + "learning_rate": 9.285912739882118e-07, + "loss": 0.0752, + "step": 920 + }, + { + "epoch": 2.696793002915452, + "grad_norm": 0.712678074836731, + "learning_rate": 8.468781035300305e-07, + "loss": 0.078, + "step": 925 + }, + { + "epoch": 2.7113702623906706, + "grad_norm": 0.7392523884773254, + "learning_rate": 7.688233527005373e-07, + "loss": 0.0868, + "step": 930 + }, + { + "epoch": 2.7259475218658893, + "grad_norm": 0.6690500378608704, + "learning_rate": 6.944471977572919e-07, + "loss": 0.0842, + "step": 935 + }, + { + "epoch": 2.740524781341108, + "grad_norm": 0.6522590517997742, + "learning_rate": 6.237688640830019e-07, + "loss": 0.0926, + "step": 940 + }, + { + "epoch": 2.7551020408163263, + "grad_norm": 0.6658541560173035, + "learning_rate": 5.568066212160227e-07, + "loss": 0.0853, + "step": 945 + }, + { + "epoch": 2.7696793002915454, + "grad_norm": 0.6125648617744446, + "learning_rate": 4.935777781278655e-07, + "loss": 0.0761, + "step": 950 + }, + { + "epoch": 2.7842565597667637, + "grad_norm": 0.6864567995071411, + "learning_rate": 4.340986787490292e-07, + "loss": 0.0763, + "step": 955 + }, + { + "epoch": 2.7988338192419824, + "grad_norm": 0.6097369194030762, + "learning_rate": 3.783846977442884e-07, + "loss": 0.0785, + "step": 960 + }, + { + "epoch": 2.813411078717201, + "grad_norm": 0.643641471862793, + "learning_rate": 3.264502365385158e-07, + "loss": 0.0883, + "step": 965 + }, + { + "epoch": 2.82798833819242, + "grad_norm": 0.8567319512367249, + "learning_rate": 2.783087195940781e-07, + "loss": 0.0879, + "step": 970 + }, + { + "epoch": 2.8425655976676385, + "grad_norm": 0.6506252884864807, + "learning_rate": 2.339725909407703e-07, + "loss": 0.0676, + "step": 975 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6658901572227478, + "learning_rate": 1.9345331095917184e-07, + "loss": 0.0738, + "step": 980 + }, + { + "epoch": 2.871720116618076, + "grad_norm": 0.7131209969520569, + "learning_rate": 1.5676135341826825e-07, + "loss": 0.0797, + "step": 985 + }, + { + "epoch": 2.8862973760932946, + "grad_norm": 0.5810184478759766, + "learning_rate": 1.2390620276811316e-07, + "loss": 0.0784, + "step": 990 + }, + { + "epoch": 2.9008746355685133, + "grad_norm": 0.7499890923500061, + "learning_rate": 9.489635168819621e-08, + "loss": 0.073, + "step": 995 + }, + { + "epoch": 2.9154518950437316, + "grad_norm": 0.7442775368690491, + "learning_rate": 6.973929889218578e-08, + "loss": 0.0904, + "step": 1000 + }, + { + "epoch": 2.9300291545189503, + "grad_norm": 0.8350651264190674, + "learning_rate": 4.844154718959604e-08, + "loss": 0.0918, + "step": 1005 + }, + { + "epoch": 2.944606413994169, + "grad_norm": 0.7166028618812561, + "learning_rate": 3.100860180489162e-08, + "loss": 0.0838, + "step": 1010 + }, + { + "epoch": 2.9591836734693877, + "grad_norm": 0.607093334197998, + "learning_rate": 1.7444968954440922e-08, + "loss": 0.0762, + "step": 1015 + }, + { + "epoch": 2.9737609329446064, + "grad_norm": 0.6391603350639343, + "learning_rate": 7.754154681717807e-09, + "loss": 0.0715, + "step": 1020 + }, + { + "epoch": 2.988338192419825, + "grad_norm": 0.8385747075080872, + "learning_rate": 1.938663951026598e-09, + "loss": 0.0903, + "step": 1025 + }, + { + "epoch": 3.0, + "step": 1029, + "total_flos": 1.258493972385366e+18, + "train_loss": 0.4077561020677361, + "train_runtime": 555.4384, + "train_samples_per_second": 59.25, + "train_steps_per_second": 1.853 + } + ], + "logging_steps": 5, + "max_steps": 1029, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.258493972385366e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c301c8dc23a47471c49a776d006fa47ce9017e16 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/12_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5376940c791b1fe40ce657b127cc1354696680e072e14b2f41b33413def93d1f +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e5aa7ce6aaf9bc982442e0674cdc23004aeaa04c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 13_128_e3_3e-5 + results: [] +--- + + + +# 13_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..41468b845617a5d81a62d2532b987d4ce15b65d6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "v_proj", + "q_proj", + "o_proj", + "gate_proj", + "down_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e58d5b95b24e77f4edea84ac511d61a2dc700663 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b2dad0bb7bf336c3eeac4e3236cbcd0dfc379d3b3c2cb0a15a98dd11bce838 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2cfc2602dac4467fbb839152caa5033610b8cb5c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4247666007911956e+18, + "train_loss": 0.4142158398785258, + "train_runtime": 628.0444, + "train_samples": 11898, + "train_samples_per_second": 56.834, + "train_steps_per_second": 1.777 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2cfc2602dac4467fbb839152caa5033610b8cb5c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4247666007911956e+18, + "train_loss": 0.4142158398785258, + "train_runtime": 628.0444, + "train_samples": 11898, + "train_samples_per_second": 56.834, + "train_steps_per_second": 1.777 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b5df8bd23f24da3ef7efb501664c5064c88455ca --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1604 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1116, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013440860215053764, + "grad_norm": 0.6562577486038208, + "learning_rate": 2.1428571428571427e-06, + "loss": 1.5983, + "step": 5 + }, + { + "epoch": 0.026881720430107527, + "grad_norm": 0.5967065691947937, + "learning_rate": 4.821428571428572e-06, + "loss": 1.6431, + "step": 10 + }, + { + "epoch": 0.04032258064516129, + "grad_norm": 0.5379071831703186, + "learning_rate": 7.5e-06, + "loss": 1.5937, + "step": 15 + }, + { + "epoch": 0.053763440860215055, + "grad_norm": 0.5738704204559326, + "learning_rate": 1.0178571428571429e-05, + "loss": 1.5193, + "step": 20 + }, + { + "epoch": 0.06720430107526881, + "grad_norm": 0.5172061920166016, + "learning_rate": 1.2857142857142857e-05, + "loss": 1.6223, + "step": 25 + }, + { + "epoch": 0.08064516129032258, + "grad_norm": 0.5065715909004211, + "learning_rate": 1.553571428571429e-05, + "loss": 1.51, + "step": 30 + }, + { + "epoch": 0.09408602150537634, + "grad_norm": 0.5066525340080261, + "learning_rate": 1.8214285714285712e-05, + "loss": 1.4738, + "step": 35 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 0.45955690741539, + "learning_rate": 2.089285714285714e-05, + "loss": 1.5019, + "step": 40 + }, + { + "epoch": 0.12096774193548387, + "grad_norm": 0.49052467942237854, + "learning_rate": 2.357142857142857e-05, + "loss": 1.4117, + "step": 45 + }, + { + "epoch": 0.13440860215053763, + "grad_norm": 0.5020540952682495, + "learning_rate": 2.625e-05, + "loss": 1.4474, + "step": 50 + }, + { + "epoch": 0.1478494623655914, + "grad_norm": 0.5581772327423096, + "learning_rate": 2.892857142857143e-05, + "loss": 1.4316, + "step": 55 + }, + { + "epoch": 0.16129032258064516, + "grad_norm": 0.5363171100616455, + "learning_rate": 2.9999407089793328e-05, + "loss": 1.3771, + "step": 60 + }, + { + "epoch": 0.17473118279569894, + "grad_norm": 0.5731486082077026, + "learning_rate": 2.99957839193835e-05, + "loss": 1.3793, + "step": 65 + }, + { + "epoch": 0.1881720430107527, + "grad_norm": 0.6195076704025269, + "learning_rate": 2.9988867767784412e-05, + "loss": 1.294, + "step": 70 + }, + { + "epoch": 0.20161290322580644, + "grad_norm": 0.6200590133666992, + "learning_rate": 2.997866015374007e-05, + "loss": 1.3493, + "step": 75 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 0.5504101514816284, + "learning_rate": 2.996516331877925e-05, + "loss": 1.226, + "step": 80 + }, + { + "epoch": 0.22849462365591397, + "grad_norm": 0.6403772830963135, + "learning_rate": 2.994838022672324e-05, + "loss": 1.1889, + "step": 85 + }, + { + "epoch": 0.24193548387096775, + "grad_norm": 0.6019793748855591, + "learning_rate": 2.9928314563035015e-05, + "loss": 1.2157, + "step": 90 + }, + { + "epoch": 0.2553763440860215, + "grad_norm": 0.6491774320602417, + "learning_rate": 2.9904970734009933e-05, + "loss": 1.2002, + "step": 95 + }, + { + "epoch": 0.26881720430107525, + "grad_norm": 0.7385770678520203, + "learning_rate": 2.9878353865808144e-05, + "loss": 1.1678, + "step": 100 + }, + { + "epoch": 0.28225806451612906, + "grad_norm": 0.7286717295646667, + "learning_rate": 2.9848469803328896e-05, + "loss": 1.1376, + "step": 105 + }, + { + "epoch": 0.2956989247311828, + "grad_norm": 0.7336918711662292, + "learning_rate": 2.981532510892707e-05, + "loss": 1.1592, + "step": 110 + }, + { + "epoch": 0.30913978494623656, + "grad_norm": 0.7254199981689453, + "learning_rate": 2.9778927060972075e-05, + "loss": 1.0364, + "step": 115 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.8288744688034058, + "learning_rate": 2.9739283652249625e-05, + "loss": 1.0686, + "step": 120 + }, + { + "epoch": 0.33602150537634407, + "grad_norm": 0.8611423969268799, + "learning_rate": 2.9696403588206517e-05, + "loss": 1.0114, + "step": 125 + }, + { + "epoch": 0.34946236559139787, + "grad_norm": 0.8876594305038452, + "learning_rate": 2.9650296285038996e-05, + "loss": 1.014, + "step": 130 + }, + { + "epoch": 0.3629032258064516, + "grad_norm": 0.8985331058502197, + "learning_rate": 2.9600971867625027e-05, + "loss": 1.0074, + "step": 135 + }, + { + "epoch": 0.3763440860215054, + "grad_norm": 0.8707618713378906, + "learning_rate": 2.95484411673009e-05, + "loss": 0.9861, + "step": 140 + }, + { + "epoch": 0.3897849462365591, + "grad_norm": 0.9390861392021179, + "learning_rate": 2.9492715719482776e-05, + "loss": 0.9452, + "step": 145 + }, + { + "epoch": 0.4032258064516129, + "grad_norm": 0.887958824634552, + "learning_rate": 2.9433807761133542e-05, + "loss": 0.9571, + "step": 150 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.9454925060272217, + "learning_rate": 2.9371730228075684e-05, + "loss": 0.9212, + "step": 155 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 0.9069709777832031, + "learning_rate": 2.9306496752150638e-05, + "loss": 0.8955, + "step": 160 + }, + { + "epoch": 0.4435483870967742, + "grad_norm": 0.9906080365180969, + "learning_rate": 2.9238121658225337e-05, + "loss": 0.8979, + "step": 165 + }, + { + "epoch": 0.45698924731182794, + "grad_norm": 1.0171024799346924, + "learning_rate": 2.9166619961046545e-05, + "loss": 0.926, + "step": 170 + }, + { + "epoch": 0.47043010752688175, + "grad_norm": 0.9585063457489014, + "learning_rate": 2.909200736194372e-05, + "loss": 0.8276, + "step": 175 + }, + { + "epoch": 0.4838709677419355, + "grad_norm": 0.9530682563781738, + "learning_rate": 2.9014300245381095e-05, + "loss": 0.82, + "step": 180 + }, + { + "epoch": 0.49731182795698925, + "grad_norm": 1.1171059608459473, + "learning_rate": 2.8933515675359766e-05, + "loss": 0.8026, + "step": 185 + }, + { + "epoch": 0.510752688172043, + "grad_norm": 1.0209691524505615, + "learning_rate": 2.8849671391670518e-05, + "loss": 0.8119, + "step": 190 + }, + { + "epoch": 0.5241935483870968, + "grad_norm": 1.138441801071167, + "learning_rate": 2.8762785805998295e-05, + "loss": 0.7208, + "step": 195 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 1.118868112564087, + "learning_rate": 2.86728779978791e-05, + "loss": 0.8144, + "step": 200 + }, + { + "epoch": 0.5510752688172043, + "grad_norm": 0.9811844229698181, + "learning_rate": 2.8579967710510264e-05, + "loss": 0.7826, + "step": 205 + }, + { + "epoch": 0.5645161290322581, + "grad_norm": 1.209446668624878, + "learning_rate": 2.8484075346414936e-05, + "loss": 0.7248, + "step": 210 + }, + { + "epoch": 0.5779569892473119, + "grad_norm": 1.1912473440170288, + "learning_rate": 2.838522196296182e-05, + "loss": 0.7017, + "step": 215 + }, + { + "epoch": 0.5913978494623656, + "grad_norm": 1.156508445739746, + "learning_rate": 2.8283429267741138e-05, + "loss": 0.7282, + "step": 220 + }, + { + "epoch": 0.6048387096774194, + "grad_norm": 1.202690601348877, + "learning_rate": 2.8178719613797747e-05, + "loss": 0.6996, + "step": 225 + }, + { + "epoch": 0.6182795698924731, + "grad_norm": 1.1130589246749878, + "learning_rate": 2.807111599472254e-05, + "loss": 0.7169, + "step": 230 + }, + { + "epoch": 0.6317204301075269, + "grad_norm": 1.2702373266220093, + "learning_rate": 2.7960642039603235e-05, + "loss": 0.6497, + "step": 235 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 1.0923742055892944, + "learning_rate": 2.7847322007835546e-05, + "loss": 0.6972, + "step": 240 + }, + { + "epoch": 0.6586021505376344, + "grad_norm": 1.2124489545822144, + "learning_rate": 2.773118078379597e-05, + "loss": 0.6767, + "step": 245 + }, + { + "epoch": 0.6720430107526881, + "grad_norm": 1.0564804077148438, + "learning_rate": 2.7612243871377342e-05, + "loss": 0.6717, + "step": 250 + }, + { + "epoch": 0.6854838709677419, + "grad_norm": 1.0164963006973267, + "learning_rate": 2.749053738838834e-05, + "loss": 0.6754, + "step": 255 + }, + { + "epoch": 0.6989247311827957, + "grad_norm": 1.2455440759658813, + "learning_rate": 2.7366088060818154e-05, + "loss": 0.5978, + "step": 260 + }, + { + "epoch": 0.7123655913978495, + "grad_norm": 1.1370494365692139, + "learning_rate": 2.7238923216967666e-05, + "loss": 0.656, + "step": 265 + }, + { + "epoch": 0.7258064516129032, + "grad_norm": 1.2534204721450806, + "learning_rate": 2.7109070781448283e-05, + "loss": 0.6063, + "step": 270 + }, + { + "epoch": 0.739247311827957, + "grad_norm": 1.1029592752456665, + "learning_rate": 2.6976559269049875e-05, + "loss": 0.649, + "step": 275 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 1.0641226768493652, + "learning_rate": 2.6841417778479132e-05, + "loss": 0.5913, + "step": 280 + }, + { + "epoch": 0.7661290322580645, + "grad_norm": 1.3692972660064697, + "learning_rate": 2.670367598596963e-05, + "loss": 0.5815, + "step": 285 + }, + { + "epoch": 0.7795698924731183, + "grad_norm": 1.1979507207870483, + "learning_rate": 2.6563364138765137e-05, + "loss": 0.6388, + "step": 290 + }, + { + "epoch": 0.793010752688172, + "grad_norm": 1.0671974420547485, + "learning_rate": 2.6420513048477503e-05, + "loss": 0.5533, + "step": 295 + }, + { + "epoch": 0.8064516129032258, + "grad_norm": 1.2668418884277344, + "learning_rate": 2.6275154084320622e-05, + "loss": 0.561, + "step": 300 + }, + { + "epoch": 0.8198924731182796, + "grad_norm": 1.2684834003448486, + "learning_rate": 2.6127319166221923e-05, + "loss": 0.5552, + "step": 305 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 1.4477416276931763, + "learning_rate": 2.5977040757812997e-05, + "loss": 0.583, + "step": 310 + }, + { + "epoch": 0.8467741935483871, + "grad_norm": 1.2804896831512451, + "learning_rate": 2.5824351859300748e-05, + "loss": 0.525, + "step": 315 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.180612564086914, + "learning_rate": 2.5669286000220757e-05, + "loss": 0.5289, + "step": 320 + }, + { + "epoch": 0.8736559139784946, + "grad_norm": 1.3081634044647217, + "learning_rate": 2.551187723207443e-05, + "loss": 0.4994, + "step": 325 + }, + { + "epoch": 0.8870967741935484, + "grad_norm": 1.2035614252090454, + "learning_rate": 2.5352160120851464e-05, + "loss": 0.4643, + "step": 330 + }, + { + "epoch": 0.9005376344086021, + "grad_norm": 1.1888489723205566, + "learning_rate": 2.519016973943939e-05, + "loss": 0.5126, + "step": 335 + }, + { + "epoch": 0.9139784946236559, + "grad_norm": 1.282396674156189, + "learning_rate": 2.5025941659921783e-05, + "loss": 0.4891, + "step": 340 + }, + { + "epoch": 0.9274193548387096, + "grad_norm": 1.167871356010437, + "learning_rate": 2.485951194576685e-05, + "loss": 0.5015, + "step": 345 + }, + { + "epoch": 0.9408602150537635, + "grad_norm": 1.5241525173187256, + "learning_rate": 2.469091714390811e-05, + "loss": 0.4599, + "step": 350 + }, + { + "epoch": 0.9543010752688172, + "grad_norm": 1.3221710920333862, + "learning_rate": 2.4520194276718938e-05, + "loss": 0.5104, + "step": 355 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 1.3102362155914307, + "learning_rate": 2.4347380833882634e-05, + "loss": 0.468, + "step": 360 + }, + { + "epoch": 0.9811827956989247, + "grad_norm": 1.1158560514450073, + "learning_rate": 2.417251476415998e-05, + "loss": 0.4552, + "step": 365 + }, + { + "epoch": 0.9946236559139785, + "grad_norm": 1.2462714910507202, + "learning_rate": 2.3995634467055882e-05, + "loss": 0.4806, + "step": 370 + }, + { + "epoch": 1.0080645161290323, + "grad_norm": 1.3468903303146362, + "learning_rate": 2.3816778784387097e-05, + "loss": 0.4277, + "step": 375 + }, + { + "epoch": 1.021505376344086, + "grad_norm": 1.323755145072937, + "learning_rate": 2.363598699175281e-05, + "loss": 0.4019, + "step": 380 + }, + { + "epoch": 1.0349462365591398, + "grad_norm": 1.0861363410949707, + "learning_rate": 2.3453298789909935e-05, + "loss": 0.36, + "step": 385 + }, + { + "epoch": 1.0483870967741935, + "grad_norm": 1.1317058801651, + "learning_rate": 2.3268754296055122e-05, + "loss": 0.3423, + "step": 390 + }, + { + "epoch": 1.0618279569892473, + "grad_norm": 1.1909416913986206, + "learning_rate": 2.3082394035015212e-05, + "loss": 0.4001, + "step": 395 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 1.1991876363754272, + "learning_rate": 2.2894258930348284e-05, + "loss": 0.4239, + "step": 400 + }, + { + "epoch": 1.0887096774193548, + "grad_norm": 1.3869519233703613, + "learning_rate": 2.2704390295357056e-05, + "loss": 0.3315, + "step": 405 + }, + { + "epoch": 1.1021505376344085, + "grad_norm": 1.291604995727539, + "learning_rate": 2.2512829824016773e-05, + "loss": 0.3484, + "step": 410 + }, + { + "epoch": 1.1155913978494623, + "grad_norm": 1.2116373777389526, + "learning_rate": 2.2319619581819458e-05, + "loss": 0.3668, + "step": 415 + }, + { + "epoch": 1.129032258064516, + "grad_norm": 1.2152055501937866, + "learning_rate": 2.2124801996536575e-05, + "loss": 0.3719, + "step": 420 + }, + { + "epoch": 1.14247311827957, + "grad_norm": 1.1148264408111572, + "learning_rate": 2.1928419848902163e-05, + "loss": 0.3465, + "step": 425 + }, + { + "epoch": 1.1559139784946237, + "grad_norm": 1.1606225967407227, + "learning_rate": 2.1730516263218472e-05, + "loss": 0.3121, + "step": 430 + }, + { + "epoch": 1.1693548387096775, + "grad_norm": 1.2450103759765625, + "learning_rate": 2.1531134697886103e-05, + "loss": 0.3168, + "step": 435 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 1.1983078718185425, + "learning_rate": 2.133031893586083e-05, + "loss": 0.305, + "step": 440 + }, + { + "epoch": 1.196236559139785, + "grad_norm": 1.6005736589431763, + "learning_rate": 2.1128113075039124e-05, + "loss": 0.3153, + "step": 445 + }, + { + "epoch": 1.2096774193548387, + "grad_norm": 1.2613251209259033, + "learning_rate": 2.092456151857455e-05, + "loss": 0.3104, + "step": 450 + }, + { + "epoch": 1.2231182795698925, + "grad_norm": 1.4524179697036743, + "learning_rate": 2.0719708965127073e-05, + "loss": 0.3528, + "step": 455 + }, + { + "epoch": 1.2365591397849462, + "grad_norm": 1.2752647399902344, + "learning_rate": 2.0513600399047545e-05, + "loss": 0.3093, + "step": 460 + }, + { + "epoch": 1.25, + "grad_norm": 1.309078574180603, + "learning_rate": 2.0306281080499413e-05, + "loss": 0.3219, + "step": 465 + }, + { + "epoch": 1.2634408602150538, + "grad_norm": 1.271602749824524, + "learning_rate": 2.009779653551983e-05, + "loss": 0.2708, + "step": 470 + }, + { + "epoch": 1.2768817204301075, + "grad_norm": 1.235925555229187, + "learning_rate": 1.9888192546022488e-05, + "loss": 0.3042, + "step": 475 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 1.0537954568862915, + "learning_rate": 1.9677515139744126e-05, + "loss": 0.2657, + "step": 480 + }, + { + "epoch": 1.303763440860215, + "grad_norm": 1.4249722957611084, + "learning_rate": 1.946581058013717e-05, + "loss": 0.2862, + "step": 485 + }, + { + "epoch": 1.3172043010752688, + "grad_norm": 1.0850040912628174, + "learning_rate": 1.9253125356210547e-05, + "loss": 0.2536, + "step": 490 + }, + { + "epoch": 1.3306451612903225, + "grad_norm": 1.3648841381072998, + "learning_rate": 1.903950617232098e-05, + "loss": 0.2768, + "step": 495 + }, + { + "epoch": 1.3440860215053765, + "grad_norm": 1.1673448085784912, + "learning_rate": 1.8824999937917025e-05, + "loss": 0.2736, + "step": 500 + }, + { + "epoch": 1.35752688172043, + "grad_norm": 1.2838027477264404, + "learning_rate": 1.8609653757238026e-05, + "loss": 0.2468, + "step": 505 + }, + { + "epoch": 1.370967741935484, + "grad_norm": 1.0797650814056396, + "learning_rate": 1.8393514918970315e-05, + "loss": 0.2461, + "step": 510 + }, + { + "epoch": 1.3844086021505375, + "grad_norm": 1.2497416734695435, + "learning_rate": 1.8176630885862927e-05, + "loss": 0.2492, + "step": 515 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 1.269156813621521, + "learning_rate": 1.7959049284305056e-05, + "loss": 0.2778, + "step": 520 + }, + { + "epoch": 1.4112903225806452, + "grad_norm": 1.3660997152328491, + "learning_rate": 1.7740817893867613e-05, + "loss": 0.2834, + "step": 525 + }, + { + "epoch": 1.424731182795699, + "grad_norm": 1.234924077987671, + "learning_rate": 1.752198463681111e-05, + "loss": 0.2559, + "step": 530 + }, + { + "epoch": 1.4381720430107527, + "grad_norm": 1.290900707244873, + "learning_rate": 1.7302597567562237e-05, + "loss": 0.2404, + "step": 535 + }, + { + "epoch": 1.4516129032258065, + "grad_norm": 1.1388347148895264, + "learning_rate": 1.7082704862161422e-05, + "loss": 0.2761, + "step": 540 + }, + { + "epoch": 1.4650537634408602, + "grad_norm": 1.1554988622665405, + "learning_rate": 1.6862354807683643e-05, + "loss": 0.2507, + "step": 545 + }, + { + "epoch": 1.478494623655914, + "grad_norm": 1.2063636779785156, + "learning_rate": 1.66415957916349e-05, + "loss": 0.2257, + "step": 550 + }, + { + "epoch": 1.4919354838709677, + "grad_norm": 1.2385138273239136, + "learning_rate": 1.642047629132663e-05, + "loss": 0.2536, + "step": 555 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 1.3115344047546387, + "learning_rate": 1.619904486323037e-05, + "loss": 0.2289, + "step": 560 + }, + { + "epoch": 1.5188172043010753, + "grad_norm": 1.3488812446594238, + "learning_rate": 1.597735013231507e-05, + "loss": 0.2371, + "step": 565 + }, + { + "epoch": 1.532258064516129, + "grad_norm": 1.211215615272522, + "learning_rate": 1.5755440781369345e-05, + "loss": 0.2516, + "step": 570 + }, + { + "epoch": 1.5456989247311828, + "grad_norm": 1.1602976322174072, + "learning_rate": 1.5533365540311038e-05, + "loss": 0.2362, + "step": 575 + }, + { + "epoch": 1.5591397849462365, + "grad_norm": 1.1652741432189941, + "learning_rate": 1.531117317548643e-05, + "loss": 0.2159, + "step": 580 + }, + { + "epoch": 1.5725806451612905, + "grad_norm": 1.211682677268982, + "learning_rate": 1.5088912478961458e-05, + "loss": 0.2465, + "step": 585 + }, + { + "epoch": 1.586021505376344, + "grad_norm": 1.034642219543457, + "learning_rate": 1.4866632257807278e-05, + "loss": 0.231, + "step": 590 + }, + { + "epoch": 1.599462365591398, + "grad_norm": 1.238311767578125, + "learning_rate": 1.4644381323382539e-05, + "loss": 0.1905, + "step": 595 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 1.0183266401290894, + "learning_rate": 1.4422208480614684e-05, + "loss": 0.2036, + "step": 600 + }, + { + "epoch": 1.6263440860215055, + "grad_norm": 1.2637821435928345, + "learning_rate": 1.4200162517282736e-05, + "loss": 0.1899, + "step": 605 + }, + { + "epoch": 1.639784946236559, + "grad_norm": 1.2578243017196655, + "learning_rate": 1.3978292193303768e-05, + "loss": 0.2111, + "step": 610 + }, + { + "epoch": 1.653225806451613, + "grad_norm": 1.3745654821395874, + "learning_rate": 1.3756646230025555e-05, + "loss": 0.1833, + "step": 615 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 1.234005093574524, + "learning_rate": 1.3535273299527622e-05, + "loss": 0.1969, + "step": 620 + }, + { + "epoch": 1.6801075268817205, + "grad_norm": 1.20147705078125, + "learning_rate": 1.3314222013933218e-05, + "loss": 0.1995, + "step": 625 + }, + { + "epoch": 1.6935483870967742, + "grad_norm": 1.1531866788864136, + "learning_rate": 1.3093540914734351e-05, + "loss": 0.204, + "step": 630 + }, + { + "epoch": 1.706989247311828, + "grad_norm": 1.2209144830703735, + "learning_rate": 1.2873278462132401e-05, + "loss": 0.2097, + "step": 635 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 1.342939853668213, + "learning_rate": 1.2653483024396535e-05, + "loss": 0.1855, + "step": 640 + }, + { + "epoch": 1.7338709677419355, + "grad_norm": 1.3685388565063477, + "learning_rate": 1.2434202867242372e-05, + "loss": 0.1865, + "step": 645 + }, + { + "epoch": 1.7473118279569892, + "grad_norm": 1.0821951627731323, + "learning_rate": 1.221548614323308e-05, + "loss": 0.1804, + "step": 650 + }, + { + "epoch": 1.760752688172043, + "grad_norm": 1.1324962377548218, + "learning_rate": 1.1997380881205431e-05, + "loss": 0.1775, + "step": 655 + }, + { + "epoch": 1.7741935483870968, + "grad_norm": 1.2811193466186523, + "learning_rate": 1.1779934975722919e-05, + "loss": 0.1908, + "step": 660 + }, + { + "epoch": 1.7876344086021505, + "grad_norm": 1.4585835933685303, + "learning_rate": 1.1563196176558436e-05, + "loss": 0.2007, + "step": 665 + }, + { + "epoch": 1.8010752688172043, + "grad_norm": 1.1551220417022705, + "learning_rate": 1.13472120782087e-05, + "loss": 0.2048, + "step": 670 + }, + { + "epoch": 1.814516129032258, + "grad_norm": 1.1691032648086548, + "learning_rate": 1.1132030109442823e-05, + "loss": 0.1992, + "step": 675 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 1.0334722995758057, + "learning_rate": 1.0917697522887217e-05, + "loss": 0.147, + "step": 680 + }, + { + "epoch": 1.8413978494623655, + "grad_norm": 1.0444282293319702, + "learning_rate": 1.0704261384649242e-05, + "loss": 0.1508, + "step": 685 + }, + { + "epoch": 1.8548387096774195, + "grad_norm": 1.212236762046814, + "learning_rate": 1.0491768563981747e-05, + "loss": 0.1823, + "step": 690 + }, + { + "epoch": 1.868279569892473, + "grad_norm": 1.0465245246887207, + "learning_rate": 1.0280265722990908e-05, + "loss": 0.1403, + "step": 695 + }, + { + "epoch": 1.881720430107527, + "grad_norm": 1.11671781539917, + "learning_rate": 1.0069799306389485e-05, + "loss": 0.1683, + "step": 700 + }, + { + "epoch": 1.8951612903225805, + "grad_norm": 1.007764220237732, + "learning_rate": 9.860415531297881e-06, + "loss": 0.1546, + "step": 705 + }, + { + "epoch": 1.9086021505376345, + "grad_norm": 1.187238097190857, + "learning_rate": 9.652160377095124e-06, + "loss": 0.1705, + "step": 710 + }, + { + "epoch": 1.922043010752688, + "grad_norm": 1.1675169467926025, + "learning_rate": 9.445079575322101e-06, + "loss": 0.1608, + "step": 715 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 1.2236980199813843, + "learning_rate": 9.239218599639171e-06, + "loss": 0.1263, + "step": 720 + }, + { + "epoch": 1.9489247311827957, + "grad_norm": 0.893860936164856, + "learning_rate": 9.03462265584046e-06, + "loss": 0.128, + "step": 725 + }, + { + "epoch": 1.9623655913978495, + "grad_norm": 0.9936382174491882, + "learning_rate": 8.831336671926924e-06, + "loss": 0.1551, + "step": 730 + }, + { + "epoch": 1.9758064516129032, + "grad_norm": 1.268470287322998, + "learning_rate": 8.629405288240461e-06, + "loss": 0.1314, + "step": 735 + }, + { + "epoch": 1.989247311827957, + "grad_norm": 0.9592378735542297, + "learning_rate": 8.428872847661139e-06, + "loss": 0.1701, + "step": 740 + }, + { + "epoch": 2.002688172043011, + "grad_norm": 0.9434704184532166, + "learning_rate": 8.229783385869807e-06, + "loss": 0.1549, + "step": 745 + }, + { + "epoch": 2.0161290322580645, + "grad_norm": 1.146842122077942, + "learning_rate": 8.03218062167811e-06, + "loss": 0.1214, + "step": 750 + }, + { + "epoch": 2.0295698924731185, + "grad_norm": 0.9829095602035522, + "learning_rate": 7.836107947428115e-06, + "loss": 0.1165, + "step": 755 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.998819887638092, + "learning_rate": 7.641608419463621e-06, + "loss": 0.1073, + "step": 760 + }, + { + "epoch": 2.056451612903226, + "grad_norm": 1.1372114419937134, + "learning_rate": 7.448724748675273e-06, + "loss": 0.105, + "step": 765 + }, + { + "epoch": 2.0698924731182795, + "grad_norm": 0.8414438962936401, + "learning_rate": 7.257499291121473e-06, + "loss": 0.1235, + "step": 770 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 1.0400238037109375, + "learning_rate": 7.0679740387272896e-06, + "loss": 0.1119, + "step": 775 + }, + { + "epoch": 2.096774193548387, + "grad_norm": 0.787933886051178, + "learning_rate": 6.880190610063272e-06, + "loss": 0.1099, + "step": 780 + }, + { + "epoch": 2.110215053763441, + "grad_norm": 0.7803722620010376, + "learning_rate": 6.694190241206277e-06, + "loss": 0.1118, + "step": 785 + }, + { + "epoch": 2.1236559139784945, + "grad_norm": 1.0177898406982422, + "learning_rate": 6.510013776684281e-06, + "loss": 0.1327, + "step": 790 + }, + { + "epoch": 2.1370967741935485, + "grad_norm": 1.001783847808838, + "learning_rate": 6.327701660507191e-06, + "loss": 0.0947, + "step": 795 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.8777228593826294, + "learning_rate": 6.147293927285537e-06, + "loss": 0.0922, + "step": 800 + }, + { + "epoch": 2.163978494623656, + "grad_norm": 1.0548250675201416, + "learning_rate": 5.968830193439195e-06, + "loss": 0.0943, + "step": 805 + }, + { + "epoch": 2.1774193548387095, + "grad_norm": 1.046064853668213, + "learning_rate": 5.7923496484978115e-06, + "loss": 0.0873, + "step": 810 + }, + { + "epoch": 2.1908602150537635, + "grad_norm": 0.8252707123756409, + "learning_rate": 5.617891046495046e-06, + "loss": 0.1068, + "step": 815 + }, + { + "epoch": 2.204301075268817, + "grad_norm": 0.9281523823738098, + "learning_rate": 5.44549269745842e-06, + "loss": 0.0925, + "step": 820 + }, + { + "epoch": 2.217741935483871, + "grad_norm": 0.9296122193336487, + "learning_rate": 5.275192458996682e-06, + "loss": 0.102, + "step": 825 + }, + { + "epoch": 2.2311827956989245, + "grad_norm": 0.8919433355331421, + "learning_rate": 5.1070277279864926e-06, + "loss": 0.089, + "step": 830 + }, + { + "epoch": 2.2446236559139785, + "grad_norm": 0.828461229801178, + "learning_rate": 4.941035432360333e-06, + "loss": 0.0931, + "step": 835 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8438311219215393, + "learning_rate": 4.777252022997327e-06, + "loss": 0.1052, + "step": 840 + }, + { + "epoch": 2.271505376344086, + "grad_norm": 0.794101357460022, + "learning_rate": 4.615713465718867e-06, + "loss": 0.1038, + "step": 845 + }, + { + "epoch": 2.28494623655914, + "grad_norm": 1.0882279872894287, + "learning_rate": 4.456455233390751e-06, + "loss": 0.0835, + "step": 850 + }, + { + "epoch": 2.2983870967741935, + "grad_norm": 0.816003143787384, + "learning_rate": 4.299512298133546e-06, + "loss": 0.0919, + "step": 855 + }, + { + "epoch": 2.3118279569892475, + "grad_norm": 0.7224701642990112, + "learning_rate": 4.144919123642947e-06, + "loss": 0.0981, + "step": 860 + }, + { + "epoch": 2.325268817204301, + "grad_norm": 0.828407883644104, + "learning_rate": 3.992709657621739e-06, + "loss": 0.0981, + "step": 865 + }, + { + "epoch": 2.338709677419355, + "grad_norm": 0.9839264750480652, + "learning_rate": 3.842917324325107e-06, + "loss": 0.1012, + "step": 870 + }, + { + "epoch": 2.3521505376344085, + "grad_norm": 0.9260988235473633, + "learning_rate": 3.6955750172208763e-06, + "loss": 0.0998, + "step": 875 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 0.9718059301376343, + "learning_rate": 3.550715091766307e-06, + "loss": 0.0979, + "step": 880 + }, + { + "epoch": 2.379032258064516, + "grad_norm": 0.7623013854026794, + "learning_rate": 3.4083693583030306e-06, + "loss": 0.0992, + "step": 885 + }, + { + "epoch": 2.39247311827957, + "grad_norm": 0.7972791194915771, + "learning_rate": 3.268569075071722e-06, + "loss": 0.0893, + "step": 890 + }, + { + "epoch": 2.4059139784946235, + "grad_norm": 0.8092714548110962, + "learning_rate": 3.131344941347937e-06, + "loss": 0.112, + "step": 895 + }, + { + "epoch": 2.4193548387096775, + "grad_norm": 0.710240364074707, + "learning_rate": 2.996727090700794e-06, + "loss": 0.0867, + "step": 900 + }, + { + "epoch": 2.432795698924731, + "grad_norm": 0.8229997754096985, + "learning_rate": 2.86474508437579e-06, + "loss": 0.0852, + "step": 905 + }, + { + "epoch": 2.446236559139785, + "grad_norm": 0.8272901177406311, + "learning_rate": 2.7354279048033524e-06, + "loss": 0.0919, + "step": 910 + }, + { + "epoch": 2.4596774193548385, + "grad_norm": 0.7493805289268494, + "learning_rate": 2.6088039492344707e-06, + "loss": 0.0943, + "step": 915 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.7813656330108643, + "learning_rate": 2.4849010235048513e-06, + "loss": 0.0934, + "step": 920 + }, + { + "epoch": 2.486559139784946, + "grad_norm": 0.792603611946106, + "learning_rate": 2.3637463359288914e-06, + "loss": 0.0944, + "step": 925 + }, + { + "epoch": 2.5, + "grad_norm": 0.7746173739433289, + "learning_rate": 2.2453664913249505e-06, + "loss": 0.097, + "step": 930 + }, + { + "epoch": 2.513440860215054, + "grad_norm": 0.8174401521682739, + "learning_rate": 2.1297874851730575e-06, + "loss": 0.0834, + "step": 935 + }, + { + "epoch": 2.5268817204301075, + "grad_norm": 0.9357252717018127, + "learning_rate": 2.017034697906489e-06, + "loss": 0.0886, + "step": 940 + }, + { + "epoch": 2.540322580645161, + "grad_norm": 1.0120594501495361, + "learning_rate": 1.9071328893383667e-06, + "loss": 0.1002, + "step": 945 + }, + { + "epoch": 2.553763440860215, + "grad_norm": 0.6979785561561584, + "learning_rate": 1.8001061932245654e-06, + "loss": 0.0931, + "step": 950 + }, + { + "epoch": 2.567204301075269, + "grad_norm": 0.776948094367981, + "learning_rate": 1.6959781119640894e-06, + "loss": 0.104, + "step": 955 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7914796471595764, + "learning_rate": 1.594771511438095e-06, + "loss": 0.0857, + "step": 960 + }, + { + "epoch": 2.5940860215053765, + "grad_norm": 0.7806062698364258, + "learning_rate": 1.4965086159886694e-06, + "loss": 0.0669, + "step": 965 + }, + { + "epoch": 2.60752688172043, + "grad_norm": 0.7870782017707825, + "learning_rate": 1.401211003538519e-06, + "loss": 0.0991, + "step": 970 + }, + { + "epoch": 2.620967741935484, + "grad_norm": 0.6584269404411316, + "learning_rate": 1.308899600852585e-06, + "loss": 0.0792, + "step": 975 + }, + { + "epoch": 2.6344086021505375, + "grad_norm": 0.6224702000617981, + "learning_rate": 1.2195946789426531e-06, + "loss": 0.0775, + "step": 980 + }, + { + "epoch": 2.6478494623655915, + "grad_norm": 0.8655925393104553, + "learning_rate": 1.13331584861597e-06, + "loss": 0.0944, + "step": 985 + }, + { + "epoch": 2.661290322580645, + "grad_norm": 0.9402522444725037, + "learning_rate": 1.0500820561688374e-06, + "loss": 0.089, + "step": 990 + }, + { + "epoch": 2.674731182795699, + "grad_norm": 0.8272058367729187, + "learning_rate": 9.699115792260953e-07, + "loss": 0.0857, + "step": 995 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.6829291582107544, + "learning_rate": 8.928220227275086e-07, + "loss": 0.082, + "step": 1000 + }, + { + "epoch": 2.7016129032258065, + "grad_norm": 0.7164384126663208, + "learning_rate": 8.18830315061801e-07, + "loss": 0.0905, + "step": 1005 + }, + { + "epoch": 2.71505376344086, + "grad_norm": 0.6525943875312805, + "learning_rate": 7.479527043492984e-07, + "loss": 0.0786, + "step": 1010 + }, + { + "epoch": 2.728494623655914, + "grad_norm": 0.7131579518318176, + "learning_rate": 6.802047548739409e-07, + "loss": 0.0813, + "step": 1015 + }, + { + "epoch": 2.741935483870968, + "grad_norm": 0.6259737014770508, + "learning_rate": 6.156013436654617e-07, + "loss": 0.0732, + "step": 1020 + }, + { + "epoch": 2.7553763440860215, + "grad_norm": 0.6460631489753723, + "learning_rate": 5.541566572324786e-07, + "loss": 0.0835, + "step": 1025 + }, + { + "epoch": 2.768817204301075, + "grad_norm": 0.7906473278999329, + "learning_rate": 4.958841884472409e-07, + "loss": 0.0923, + "step": 1030 + }, + { + "epoch": 2.782258064516129, + "grad_norm": 0.6076737642288208, + "learning_rate": 4.407967335826585e-07, + "loss": 0.0782, + "step": 1035 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.6636131405830383, + "learning_rate": 3.889063895023287e-07, + "loss": 0.0776, + "step": 1040 + }, + { + "epoch": 2.8091397849462365, + "grad_norm": 0.739325225353241, + "learning_rate": 3.4022455100414184e-07, + "loss": 0.1003, + "step": 1045 + }, + { + "epoch": 2.8225806451612905, + "grad_norm": 0.6619266867637634, + "learning_rate": 2.947619083180525e-07, + "loss": 0.0822, + "step": 1050 + }, + { + "epoch": 2.836021505376344, + "grad_norm": 0.6163301467895508, + "learning_rate": 2.5252844475856906e-07, + "loss": 0.0703, + "step": 1055 + }, + { + "epoch": 2.849462365591398, + "grad_norm": 0.7203219532966614, + "learning_rate": 2.1353343453248408e-07, + "loss": 0.0786, + "step": 1060 + }, + { + "epoch": 2.8629032258064515, + "grad_norm": 0.6186914443969727, + "learning_rate": 1.777854407023083e-07, + "loss": 0.0761, + "step": 1065 + }, + { + "epoch": 2.8763440860215055, + "grad_norm": 0.8017592430114746, + "learning_rate": 1.4529231330588644e-07, + "loss": 0.0894, + "step": 1070 + }, + { + "epoch": 2.889784946236559, + "grad_norm": 0.7173402309417725, + "learning_rate": 1.160611876325679e-07, + "loss": 0.081, + "step": 1075 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.7164760231971741, + "learning_rate": 9.009848265634669e-08, + "loss": 0.0802, + "step": 1080 + }, + { + "epoch": 2.9166666666666665, + "grad_norm": 0.5841734409332275, + "learning_rate": 6.740989962628896e-08, + "loss": 0.0806, + "step": 1085 + }, + { + "epoch": 2.9301075268817205, + "grad_norm": 0.6109783053398132, + "learning_rate": 4.800042081458456e-08, + "loss": 0.0768, + "step": 1090 + }, + { + "epoch": 2.943548387096774, + "grad_norm": 0.6812704205513, + "learning_rate": 3.187430842245709e-08, + "loss": 0.0882, + "step": 1095 + }, + { + "epoch": 2.956989247311828, + "grad_norm": 0.7498748898506165, + "learning_rate": 1.9035103644222675e-08, + "loss": 0.0837, + "step": 1100 + }, + { + "epoch": 2.970430107526882, + "grad_norm": 0.6384299993515015, + "learning_rate": 9.485625889660288e-09, + "loss": 0.0795, + "step": 1105 + }, + { + "epoch": 2.9838709677419355, + "grad_norm": 0.7052260041236877, + "learning_rate": 3.2279721648870162e-09, + "loss": 0.0766, + "step": 1110 + }, + { + "epoch": 2.997311827956989, + "grad_norm": 0.5928745269775391, + "learning_rate": 2.6351661186974074e-10, + "loss": 0.0815, + "step": 1115 + }, + { + "epoch": 3.0, + "step": 1116, + "total_flos": 1.4247666007911956e+18, + "train_loss": 0.4142158398785258, + "train_runtime": 628.0444, + "train_samples_per_second": 56.834, + "train_steps_per_second": 1.777 + } + ], + "logging_steps": 5, + "max_steps": 1116, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4247666007911956e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..958c970760bd4288b81050ef2abc6643a0644069 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/13_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b0bc5c5672be2c7662951ff41eff919a14df4467961b986db250a8fe225151 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..529fc0eba9493cfed2d3c11a6fbbce2294fbfe51 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 14_128_e3_3e-5 + results: [] +--- + + + +# 14_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..336b3e9665ec6c4c7b5e84e972bf7a1ed3881cf8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "o_proj", + "down_proj", + "k_proj", + "up_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d8fc96c49b10849174970c8f400a65442fc017c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f958fdcb44ca6756367002a53f063bdde02ed2b58ada4e2cd28ad1d4760791d9 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c95e0c1d1975b99b791447fcf0725f5fed992da8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5061941455452897e+18, + "train_loss": 0.4424345256436093, + "train_runtime": 641.9077, + "train_samples": 12598, + "train_samples_per_second": 58.878, + "train_steps_per_second": 1.841 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c95e0c1d1975b99b791447fcf0725f5fed992da8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5061941455452897e+18, + "train_loss": 0.4424345256436093, + "train_runtime": 641.9077, + "train_samples": 12598, + "train_samples_per_second": 58.878, + "train_steps_per_second": 1.841 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..56edf1ad14a6fc0d12485a2ee946688cacc6598e --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1695 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1182, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012690355329949238, + "grad_norm": 0.6320702433586121, + "learning_rate": 2e-06, + "loss": 1.6129, + "step": 5 + }, + { + "epoch": 0.025380710659898477, + "grad_norm": 0.5641027092933655, + "learning_rate": 4.5e-06, + "loss": 1.611, + "step": 10 + }, + { + "epoch": 0.03807106598984772, + "grad_norm": 0.5345904231071472, + "learning_rate": 7e-06, + "loss": 1.6137, + "step": 15 + }, + { + "epoch": 0.050761421319796954, + "grad_norm": 0.5349202752113342, + "learning_rate": 9.5e-06, + "loss": 1.6139, + "step": 20 + }, + { + "epoch": 0.06345177664974619, + "grad_norm": 0.5212976932525635, + "learning_rate": 1.2e-05, + "loss": 1.6053, + "step": 25 + }, + { + "epoch": 0.07614213197969544, + "grad_norm": 0.500096321105957, + "learning_rate": 1.45e-05, + "loss": 1.5184, + "step": 30 + }, + { + "epoch": 0.08883248730964467, + "grad_norm": 0.5094286203384399, + "learning_rate": 1.7e-05, + "loss": 1.5791, + "step": 35 + }, + { + "epoch": 0.10152284263959391, + "grad_norm": 0.4969731569290161, + "learning_rate": 1.95e-05, + "loss": 1.5535, + "step": 40 + }, + { + "epoch": 0.11421319796954314, + "grad_norm": 0.5405482053756714, + "learning_rate": 2.2e-05, + "loss": 1.4941, + "step": 45 + }, + { + "epoch": 0.12690355329949238, + "grad_norm": 0.4579775929450989, + "learning_rate": 2.45e-05, + "loss": 1.4967, + "step": 50 + }, + { + "epoch": 0.13959390862944163, + "grad_norm": 0.556422770023346, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.4771, + "step": 55 + }, + { + "epoch": 0.15228426395939088, + "grad_norm": 0.5392996072769165, + "learning_rate": 2.95e-05, + "loss": 1.376, + "step": 60 + }, + { + "epoch": 0.1649746192893401, + "grad_norm": 0.6179820895195007, + "learning_rate": 2.9999059214234348e-05, + "loss": 1.4203, + "step": 65 + }, + { + "epoch": 0.17766497461928935, + "grad_norm": 0.5724902749061584, + "learning_rate": 2.99952374743146e-05, + "loss": 1.3668, + "step": 70 + }, + { + "epoch": 0.19035532994923857, + "grad_norm": 0.5966415405273438, + "learning_rate": 2.9988476729599466e-05, + "loss": 1.3384, + "step": 75 + }, + { + "epoch": 0.20304568527918782, + "grad_norm": 0.6159475445747375, + "learning_rate": 2.9978778305167067e-05, + "loss": 1.4023, + "step": 80 + }, + { + "epoch": 0.21573604060913706, + "grad_norm": 0.6238250136375427, + "learning_rate": 2.9966144101868642e-05, + "loss": 1.2781, + "step": 85 + }, + { + "epoch": 0.22842639593908629, + "grad_norm": 0.6771887540817261, + "learning_rate": 2.995057659595603e-05, + "loss": 1.3166, + "step": 90 + }, + { + "epoch": 0.24111675126903553, + "grad_norm": 0.6870189905166626, + "learning_rate": 2.993207883859627e-05, + "loss": 1.2783, + "step": 95 + }, + { + "epoch": 0.25380710659898476, + "grad_norm": 0.6103479266166687, + "learning_rate": 2.991065445527364e-05, + "loss": 1.2548, + "step": 100 + }, + { + "epoch": 0.26649746192893403, + "grad_norm": 0.6876526474952698, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1997, + "step": 105 + }, + { + "epoch": 0.27918781725888325, + "grad_norm": 0.74831622838974, + "learning_rate": 2.985904317988701e-05, + "loss": 1.2218, + "step": 110 + }, + { + "epoch": 0.2918781725888325, + "grad_norm": 0.7153927087783813, + "learning_rate": 2.982886640342046e-05, + "loss": 1.1608, + "step": 115 + }, + { + "epoch": 0.30456852791878175, + "grad_norm": 0.7671074271202087, + "learning_rate": 2.9795783230203305e-05, + "loss": 1.1676, + "step": 120 + }, + { + "epoch": 0.31725888324873097, + "grad_norm": 0.8270482420921326, + "learning_rate": 2.9759800144401265e-05, + "loss": 1.1059, + "step": 125 + }, + { + "epoch": 0.3299492385786802, + "grad_norm": 0.8771117925643921, + "learning_rate": 2.9720924198550978e-05, + "loss": 1.0887, + "step": 130 + }, + { + "epoch": 0.3426395939086294, + "grad_norm": 0.8258090615272522, + "learning_rate": 2.967916301217774e-05, + "loss": 1.0421, + "step": 135 + }, + { + "epoch": 0.3553299492385787, + "grad_norm": 0.9184567928314209, + "learning_rate": 2.9634524770302118e-05, + "loss": 1.0497, + "step": 140 + }, + { + "epoch": 0.3680203045685279, + "grad_norm": 0.8160356283187866, + "learning_rate": 2.9587018221835692e-05, + "loss": 1.0693, + "step": 145 + }, + { + "epoch": 0.38071065989847713, + "grad_norm": 0.8382415771484375, + "learning_rate": 2.953665267786634e-05, + "loss": 1.0154, + "step": 150 + }, + { + "epoch": 0.3934010152284264, + "grad_norm": 1.0416936874389648, + "learning_rate": 2.9483438009833267e-05, + "loss": 0.9825, + "step": 155 + }, + { + "epoch": 0.40609137055837563, + "grad_norm": 0.9587664604187012, + "learning_rate": 2.942738464759229e-05, + "loss": 1.0235, + "step": 160 + }, + { + "epoch": 0.41878172588832485, + "grad_norm": 1.0197511911392212, + "learning_rate": 2.9368503577371562e-05, + "loss": 0.9228, + "step": 165 + }, + { + "epoch": 0.43147208121827413, + "grad_norm": 0.9349152445793152, + "learning_rate": 2.9306806339618394e-05, + "loss": 0.9232, + "step": 170 + }, + { + "epoch": 0.44416243654822335, + "grad_norm": 0.9438455700874329, + "learning_rate": 2.9242305026737313e-05, + "loss": 0.8975, + "step": 175 + }, + { + "epoch": 0.45685279187817257, + "grad_norm": 0.9188277721405029, + "learning_rate": 2.9175012280720024e-05, + "loss": 1.0045, + "step": 180 + }, + { + "epoch": 0.46954314720812185, + "grad_norm": 0.948183536529541, + "learning_rate": 2.910494129066766e-05, + "loss": 0.9773, + "step": 185 + }, + { + "epoch": 0.48223350253807107, + "grad_norm": 0.99471515417099, + "learning_rate": 2.9032105790205696e-05, + "loss": 0.8527, + "step": 190 + }, + { + "epoch": 0.4949238578680203, + "grad_norm": 0.9482126832008362, + "learning_rate": 2.8956520054792306e-05, + "loss": 0.877, + "step": 195 + }, + { + "epoch": 0.5076142131979695, + "grad_norm": 0.9606196880340576, + "learning_rate": 2.887819889892037e-05, + "loss": 0.8265, + "step": 200 + }, + { + "epoch": 0.5203045685279187, + "grad_norm": 0.9572938084602356, + "learning_rate": 2.8797157673213918e-05, + "loss": 0.7821, + "step": 205 + }, + { + "epoch": 0.5329949238578681, + "grad_norm": 1.4253623485565186, + "learning_rate": 2.871341226141946e-05, + "loss": 0.8203, + "step": 210 + }, + { + "epoch": 0.5456852791878173, + "grad_norm": 1.276092290878296, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.8612, + "step": 215 + }, + { + "epoch": 0.5583756345177665, + "grad_norm": 1.040412187576294, + "learning_rate": 2.853787506138226e-05, + "loss": 0.8031, + "step": 220 + }, + { + "epoch": 0.5710659898477157, + "grad_norm": 1.1418441534042358, + "learning_rate": 2.844611767770787e-05, + "loss": 0.7302, + "step": 225 + }, + { + "epoch": 0.583756345177665, + "grad_norm": 1.0752582550048828, + "learning_rate": 2.835172491033903e-05, + "loss": 0.7589, + "step": 230 + }, + { + "epoch": 0.5964467005076142, + "grad_norm": 1.133285403251648, + "learning_rate": 2.825471525986945e-05, + "loss": 0.7985, + "step": 235 + }, + { + "epoch": 0.6091370558375635, + "grad_norm": 1.1253451108932495, + "learning_rate": 2.815510773979113e-05, + "loss": 0.7038, + "step": 240 + }, + { + "epoch": 0.6218274111675127, + "grad_norm": 1.1528375148773193, + "learning_rate": 2.8052921872767832e-05, + "loss": 0.7385, + "step": 245 + }, + { + "epoch": 0.6345177664974619, + "grad_norm": 1.222985863685608, + "learning_rate": 2.7948177686808695e-05, + "loss": 0.7343, + "step": 250 + }, + { + "epoch": 0.6472081218274112, + "grad_norm": 1.1594884395599365, + "learning_rate": 2.7840895711342836e-05, + "loss": 0.717, + "step": 255 + }, + { + "epoch": 0.6598984771573604, + "grad_norm": 1.156617283821106, + "learning_rate": 2.7731096973195664e-05, + "loss": 0.6881, + "step": 260 + }, + { + "epoch": 0.6725888324873096, + "grad_norm": 1.0922139883041382, + "learning_rate": 2.7618802992467718e-05, + "loss": 0.6807, + "step": 265 + }, + { + "epoch": 0.6852791878172588, + "grad_norm": 1.2307840585708618, + "learning_rate": 2.750403577831679e-05, + "loss": 0.6953, + "step": 270 + }, + { + "epoch": 0.6979695431472082, + "grad_norm": 1.1639469861984253, + "learning_rate": 2.738681782464426e-05, + "loss": 0.685, + "step": 275 + }, + { + "epoch": 0.7106598984771574, + "grad_norm": 1.2265640497207642, + "learning_rate": 2.726717210568634e-05, + "loss": 0.6907, + "step": 280 + }, + { + "epoch": 0.7233502538071066, + "grad_norm": 1.3260412216186523, + "learning_rate": 2.7145122071511253e-05, + "loss": 0.6045, + "step": 285 + }, + { + "epoch": 0.7360406091370558, + "grad_norm": 1.0795878171920776, + "learning_rate": 2.7020691643423105e-05, + "loss": 0.6954, + "step": 290 + }, + { + "epoch": 0.748730964467005, + "grad_norm": 1.1395729780197144, + "learning_rate": 2.6893905209273405e-05, + "loss": 0.6334, + "step": 295 + }, + { + "epoch": 0.7614213197969543, + "grad_norm": 1.2655283212661743, + "learning_rate": 2.676478761868114e-05, + "loss": 0.6138, + "step": 300 + }, + { + "epoch": 0.7741116751269036, + "grad_norm": 1.2068722248077393, + "learning_rate": 2.663336417816238e-05, + "loss": 0.6542, + "step": 305 + }, + { + "epoch": 0.7868020304568528, + "grad_norm": 1.1395319700241089, + "learning_rate": 2.649966064617025e-05, + "loss": 0.6154, + "step": 310 + }, + { + "epoch": 0.799492385786802, + "grad_norm": 1.2574084997177124, + "learning_rate": 2.6363703228046455e-05, + "loss": 0.662, + "step": 315 + }, + { + "epoch": 0.8121827411167513, + "grad_norm": 1.2823418378829956, + "learning_rate": 2.6225518570885083e-05, + "loss": 0.6547, + "step": 320 + }, + { + "epoch": 0.8248730964467005, + "grad_norm": 1.216848611831665, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.5962, + "step": 325 + }, + { + "epoch": 0.8375634517766497, + "grad_norm": 1.2274343967437744, + "learning_rate": 2.5942576305166044e-05, + "loss": 0.5913, + "step": 330 + }, + { + "epoch": 0.850253807106599, + "grad_norm": 1.1890133619308472, + "learning_rate": 2.5797874152127325e-05, + "loss": 0.6205, + "step": 335 + }, + { + "epoch": 0.8629441624365483, + "grad_norm": 1.2766979932785034, + "learning_rate": 2.5651055660219847e-05, + "loss": 0.5666, + "step": 340 + }, + { + "epoch": 0.8756345177664975, + "grad_norm": 1.1462724208831787, + "learning_rate": 2.550214960526344e-05, + "loss": 0.5007, + "step": 345 + }, + { + "epoch": 0.8883248730964467, + "grad_norm": 1.2738587856292725, + "learning_rate": 2.5351185172231683e-05, + "loss": 0.5347, + "step": 350 + }, + { + "epoch": 0.9010152284263959, + "grad_norm": 1.2429465055465698, + "learning_rate": 2.5198191949531785e-05, + "loss": 0.5392, + "step": 355 + }, + { + "epoch": 0.9137055837563451, + "grad_norm": 1.1485679149627686, + "learning_rate": 2.5043199923205382e-05, + "loss": 0.5291, + "step": 360 + }, + { + "epoch": 0.9263959390862944, + "grad_norm": 1.3644722700119019, + "learning_rate": 2.488623947105138e-05, + "loss": 0.5307, + "step": 365 + }, + { + "epoch": 0.9390862944162437, + "grad_norm": 1.2410153150558472, + "learning_rate": 2.4727341356672057e-05, + "loss": 0.5067, + "step": 370 + }, + { + "epoch": 0.9517766497461929, + "grad_norm": 1.2398045063018799, + "learning_rate": 2.4566536723443486e-05, + "loss": 0.5493, + "step": 375 + }, + { + "epoch": 0.9644670050761421, + "grad_norm": 1.2930864095687866, + "learning_rate": 2.44038570884116e-05, + "loss": 0.5117, + "step": 380 + }, + { + "epoch": 0.9771573604060914, + "grad_norm": 1.2569669485092163, + "learning_rate": 2.4239334336114955e-05, + "loss": 0.4835, + "step": 385 + }, + { + "epoch": 0.9898477157360406, + "grad_norm": 1.4114385843276978, + "learning_rate": 2.4073000712335516e-05, + "loss": 0.4902, + "step": 390 + }, + { + "epoch": 1.00253807106599, + "grad_norm": 1.2238043546676636, + "learning_rate": 2.3904888817778583e-05, + "loss": 0.4899, + "step": 395 + }, + { + "epoch": 1.015228426395939, + "grad_norm": 1.600306510925293, + "learning_rate": 2.373503160168321e-05, + "loss": 0.4027, + "step": 400 + }, + { + "epoch": 1.0279187817258884, + "grad_norm": 1.1433216333389282, + "learning_rate": 2.35634623553643e-05, + "loss": 0.4022, + "step": 405 + }, + { + "epoch": 1.0406091370558375, + "grad_norm": 1.266970157623291, + "learning_rate": 2.3390214705687607e-05, + "loss": 0.4078, + "step": 410 + }, + { + "epoch": 1.0532994923857868, + "grad_norm": 1.4400715827941895, + "learning_rate": 2.321532260847905e-05, + "loss": 0.4342, + "step": 415 + }, + { + "epoch": 1.0659898477157361, + "grad_norm": 1.246975302696228, + "learning_rate": 2.3038820341869486e-05, + "loss": 0.4459, + "step": 420 + }, + { + "epoch": 1.0786802030456852, + "grad_norm": 1.529812216758728, + "learning_rate": 2.286074249957634e-05, + "loss": 0.386, + "step": 425 + }, + { + "epoch": 1.0913705583756346, + "grad_norm": 1.1796157360076904, + "learning_rate": 2.2681123984123388e-05, + "loss": 0.3992, + "step": 430 + }, + { + "epoch": 1.1040609137055837, + "grad_norm": 1.5209823846817017, + "learning_rate": 2.25e-05, + "loss": 0.3887, + "step": 435 + }, + { + "epoch": 1.116751269035533, + "grad_norm": 1.1977635622024536, + "learning_rate": 2.231740604676121e-05, + "loss": 0.3252, + "step": 440 + }, + { + "epoch": 1.1294416243654823, + "grad_norm": 1.3315677642822266, + "learning_rate": 2.2133377912069934e-05, + "loss": 0.3629, + "step": 445 + }, + { + "epoch": 1.1421319796954315, + "grad_norm": 1.1735678911209106, + "learning_rate": 2.194795166468275e-05, + "loss": 0.3397, + "step": 450 + }, + { + "epoch": 1.1548223350253808, + "grad_norm": 1.2150359153747559, + "learning_rate": 2.1761163647380583e-05, + "loss": 0.3447, + "step": 455 + }, + { + "epoch": 1.16751269035533, + "grad_norm": 1.1495888233184814, + "learning_rate": 2.157305046984563e-05, + "loss": 0.3709, + "step": 460 + }, + { + "epoch": 1.1802030456852792, + "grad_norm": 1.2158557176589966, + "learning_rate": 2.1383649001486055e-05, + "loss": 0.3864, + "step": 465 + }, + { + "epoch": 1.1928934010152283, + "grad_norm": 1.1801280975341797, + "learning_rate": 2.1192996364209708e-05, + "loss": 0.3685, + "step": 470 + }, + { + "epoch": 1.2055837563451777, + "grad_norm": 1.5606462955474854, + "learning_rate": 2.10011299251484e-05, + "loss": 0.3628, + "step": 475 + }, + { + "epoch": 1.218274111675127, + "grad_norm": 1.3210211992263794, + "learning_rate": 2.080808728933409e-05, + "loss": 0.3525, + "step": 480 + }, + { + "epoch": 1.2309644670050761, + "grad_norm": 1.3887970447540283, + "learning_rate": 2.061390629232846e-05, + "loss": 0.3425, + "step": 485 + }, + { + "epoch": 1.2436548223350254, + "grad_norm": 1.1919697523117065, + "learning_rate": 2.0418624992807297e-05, + "loss": 0.3831, + "step": 490 + }, + { + "epoch": 1.2563451776649746, + "grad_norm": 1.2968716621398926, + "learning_rate": 2.0222281665101145e-05, + "loss": 0.3006, + "step": 495 + }, + { + "epoch": 1.2690355329949239, + "grad_norm": 1.2719231843948364, + "learning_rate": 2.0024914791693707e-05, + "loss": 0.3624, + "step": 500 + }, + { + "epoch": 1.281725888324873, + "grad_norm": 1.4292354583740234, + "learning_rate": 1.982656305567942e-05, + "loss": 0.3111, + "step": 505 + }, + { + "epoch": 1.2944162436548223, + "grad_norm": 1.1292978525161743, + "learning_rate": 1.962726533318175e-05, + "loss": 0.2908, + "step": 510 + }, + { + "epoch": 1.3071065989847717, + "grad_norm": 1.3529636859893799, + "learning_rate": 1.94270606857336e-05, + "loss": 0.3253, + "step": 515 + }, + { + "epoch": 1.3197969543147208, + "grad_norm": 1.4094387292861938, + "learning_rate": 1.9225988352621445e-05, + "loss": 0.3386, + "step": 520 + }, + { + "epoch": 1.33248730964467, + "grad_norm": 1.3554987907409668, + "learning_rate": 1.9024087743194566e-05, + "loss": 0.2905, + "step": 525 + }, + { + "epoch": 1.3451776649746192, + "grad_norm": 1.2369471788406372, + "learning_rate": 1.8821398429140996e-05, + "loss": 0.3402, + "step": 530 + }, + { + "epoch": 1.3578680203045685, + "grad_norm": 1.3043594360351562, + "learning_rate": 1.8617960136731627e-05, + "loss": 0.2919, + "step": 535 + }, + { + "epoch": 1.3705583756345177, + "grad_norm": 1.4459542036056519, + "learning_rate": 1.841381273903402e-05, + "loss": 0.2951, + "step": 540 + }, + { + "epoch": 1.383248730964467, + "grad_norm": 1.2986818552017212, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.301, + "step": 545 + }, + { + "epoch": 1.3959390862944163, + "grad_norm": 1.314022421836853, + "learning_rate": 1.8003550807110744e-05, + "loss": 0.2929, + "step": 550 + }, + { + "epoch": 1.4086294416243654, + "grad_norm": 1.0836771726608276, + "learning_rate": 1.7797516682534294e-05, + "loss": 0.2882, + "step": 555 + }, + { + "epoch": 1.4213197969543148, + "grad_norm": 1.2773598432540894, + "learning_rate": 1.759093425620809e-05, + "loss": 0.2893, + "step": 560 + }, + { + "epoch": 1.434010152284264, + "grad_norm": 1.2757270336151123, + "learning_rate": 1.7383844017436998e-05, + "loss": 0.3182, + "step": 565 + }, + { + "epoch": 1.4467005076142132, + "grad_norm": 1.1826164722442627, + "learning_rate": 1.717628655505503e-05, + "loss": 0.2802, + "step": 570 + }, + { + "epoch": 1.4593908629441623, + "grad_norm": 1.241083025932312, + "learning_rate": 1.6968302549470097e-05, + "loss": 0.2937, + "step": 575 + }, + { + "epoch": 1.4720812182741116, + "grad_norm": 1.3505573272705078, + "learning_rate": 1.6759932764690834e-05, + "loss": 0.3073, + "step": 580 + }, + { + "epoch": 1.484771573604061, + "grad_norm": 1.3596383333206177, + "learning_rate": 1.6551218040336994e-05, + "loss": 0.2496, + "step": 585 + }, + { + "epoch": 1.49746192893401, + "grad_norm": 1.3478647470474243, + "learning_rate": 1.6342199283635086e-05, + "loss": 0.2467, + "step": 590 + }, + { + "epoch": 1.5101522842639594, + "grad_norm": 1.169520616531372, + "learning_rate": 1.6132917461400685e-05, + "loss": 0.2515, + "step": 595 + }, + { + "epoch": 1.5228426395939088, + "grad_norm": 1.362614631652832, + "learning_rate": 1.5923413592009145e-05, + "loss": 0.2458, + "step": 600 + }, + { + "epoch": 1.5355329949238579, + "grad_norm": 1.2234731912612915, + "learning_rate": 1.5713728737356138e-05, + "loss": 0.2287, + "step": 605 + }, + { + "epoch": 1.548223350253807, + "grad_norm": 1.1893564462661743, + "learning_rate": 1.5503903994809704e-05, + "loss": 0.26, + "step": 610 + }, + { + "epoch": 1.5609137055837563, + "grad_norm": 1.2694735527038574, + "learning_rate": 1.5293980489155335e-05, + "loss": 0.2502, + "step": 615 + }, + { + "epoch": 1.5736040609137056, + "grad_norm": 1.2593806982040405, + "learning_rate": 1.508399936453569e-05, + "loss": 0.2421, + "step": 620 + }, + { + "epoch": 1.5862944162436547, + "grad_norm": 1.421431541442871, + "learning_rate": 1.4874001776386501e-05, + "loss": 0.2485, + "step": 625 + }, + { + "epoch": 1.598984771573604, + "grad_norm": 1.3342777490615845, + "learning_rate": 1.4664028883370285e-05, + "loss": 0.2704, + "step": 630 + }, + { + "epoch": 1.6116751269035534, + "grad_norm": 1.4450843334197998, + "learning_rate": 1.4454121839309416e-05, + "loss": 0.2348, + "step": 635 + }, + { + "epoch": 1.6243654822335025, + "grad_norm": 1.3026494979858398, + "learning_rate": 1.4244321785120149e-05, + "loss": 0.2415, + "step": 640 + }, + { + "epoch": 1.6370558375634516, + "grad_norm": 1.1468092203140259, + "learning_rate": 1.4034669840749155e-05, + "loss": 0.2393, + "step": 645 + }, + { + "epoch": 1.649746192893401, + "grad_norm": 1.332000970840454, + "learning_rate": 1.3825207097114201e-05, + "loss": 0.2573, + "step": 650 + }, + { + "epoch": 1.6624365482233503, + "grad_norm": 1.1649394035339355, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.2263, + "step": 655 + }, + { + "epoch": 1.6751269035532994, + "grad_norm": 1.263023853302002, + "learning_rate": 1.3407013382264229e-05, + "loss": 0.2331, + "step": 660 + }, + { + "epoch": 1.6878172588832487, + "grad_norm": 1.1169109344482422, + "learning_rate": 1.3198364375295225e-05, + "loss": 0.2271, + "step": 665 + }, + { + "epoch": 1.700507614213198, + "grad_norm": 1.2137330770492554, + "learning_rate": 1.2990068481489631e-05, + "loss": 0.2342, + "step": 670 + }, + { + "epoch": 1.7131979695431472, + "grad_norm": 1.4052578210830688, + "learning_rate": 1.278216652598487e-05, + "loss": 0.2192, + "step": 675 + }, + { + "epoch": 1.7258883248730963, + "grad_norm": 1.2468751668930054, + "learning_rate": 1.2574699256708104e-05, + "loss": 0.21, + "step": 680 + }, + { + "epoch": 1.7385786802030458, + "grad_norm": 1.2067246437072754, + "learning_rate": 1.2367707336389761e-05, + "loss": 0.1869, + "step": 685 + }, + { + "epoch": 1.751269035532995, + "grad_norm": 1.2075904607772827, + "learning_rate": 1.2161231334593853e-05, + "loss": 0.2308, + "step": 690 + }, + { + "epoch": 1.763959390862944, + "grad_norm": 1.1924525499343872, + "learning_rate": 1.1955311719766463e-05, + "loss": 0.1945, + "step": 695 + }, + { + "epoch": 1.7766497461928934, + "grad_norm": 1.0743389129638672, + "learning_rate": 1.1749988851304141e-05, + "loss": 0.1592, + "step": 700 + }, + { + "epoch": 1.7893401015228427, + "grad_norm": 1.3566924333572388, + "learning_rate": 1.1545302971643591e-05, + "loss": 0.2055, + "step": 705 + }, + { + "epoch": 1.8020304568527918, + "grad_norm": 1.1087195873260498, + "learning_rate": 1.1341294198374342e-05, + "loss": 0.1891, + "step": 710 + }, + { + "epoch": 1.8147208121827412, + "grad_norm": 1.5293354988098145, + "learning_rate": 1.1138002516375865e-05, + "loss": 0.1951, + "step": 715 + }, + { + "epoch": 1.8274111675126905, + "grad_norm": 1.213222622871399, + "learning_rate": 1.0935467769980678e-05, + "loss": 0.18, + "step": 720 + }, + { + "epoch": 1.8401015228426396, + "grad_norm": 1.1803213357925415, + "learning_rate": 1.0733729655165056e-05, + "loss": 0.186, + "step": 725 + }, + { + "epoch": 1.8527918781725887, + "grad_norm": 1.6714361906051636, + "learning_rate": 1.0532827711768747e-05, + "loss": 0.2151, + "step": 730 + }, + { + "epoch": 1.865482233502538, + "grad_norm": 1.3207422494888306, + "learning_rate": 1.0332801315745363e-05, + "loss": 0.2139, + "step": 735 + }, + { + "epoch": 1.8781725888324874, + "grad_norm": 1.322493553161621, + "learning_rate": 1.0133689671444817e-05, + "loss": 0.1862, + "step": 740 + }, + { + "epoch": 1.8908629441624365, + "grad_norm": 1.287735939025879, + "learning_rate": 9.93553180392947e-06, + "loss": 0.1774, + "step": 745 + }, + { + "epoch": 1.9035532994923858, + "grad_norm": 1.1546549797058105, + "learning_rate": 9.738366551325343e-06, + "loss": 0.1751, + "step": 750 + }, + { + "epoch": 1.9162436548223352, + "grad_norm": 1.1232792139053345, + "learning_rate": 9.542232557210039e-06, + "loss": 0.1837, + "step": 755 + }, + { + "epoch": 1.9289340101522843, + "grad_norm": 1.1172024011611938, + "learning_rate": 9.347168263038728e-06, + "loss": 0.1611, + "step": 760 + }, + { + "epoch": 1.9416243654822334, + "grad_norm": 1.3293417692184448, + "learning_rate": 9.15321190060981e-06, + "loss": 0.1666, + "step": 765 + }, + { + "epoch": 1.9543147208121827, + "grad_norm": 1.1373034715652466, + "learning_rate": 8.960401484571612e-06, + "loss": 0.1951, + "step": 770 + }, + { + "epoch": 1.967005076142132, + "grad_norm": 1.211588740348816, + "learning_rate": 8.768774804971705e-06, + "loss": 0.1568, + "step": 775 + }, + { + "epoch": 1.9796954314720812, + "grad_norm": 1.2450007200241089, + "learning_rate": 8.57836941985017e-06, + "loss": 0.1828, + "step": 780 + }, + { + "epoch": 1.9923857868020305, + "grad_norm": 1.2583216428756714, + "learning_rate": 8.389222647878426e-06, + "loss": 0.1653, + "step": 785 + }, + { + "epoch": 2.00507614213198, + "grad_norm": 0.9901634454727173, + "learning_rate": 8.201371561044864e-06, + "loss": 0.1558, + "step": 790 + }, + { + "epoch": 2.017766497461929, + "grad_norm": 1.210463285446167, + "learning_rate": 8.014852977388965e-06, + "loss": 0.1541, + "step": 795 + }, + { + "epoch": 2.030456852791878, + "grad_norm": 1.0173143148422241, + "learning_rate": 7.82970345378503e-06, + "loss": 0.1495, + "step": 800 + }, + { + "epoch": 2.0431472081218276, + "grad_norm": 0.8294602632522583, + "learning_rate": 7.645959278777271e-06, + "loss": 0.1192, + "step": 805 + }, + { + "epoch": 2.0558375634517767, + "grad_norm": 1.1245883703231812, + "learning_rate": 7.46365646546732e-06, + "loss": 0.122, + "step": 810 + }, + { + "epoch": 2.068527918781726, + "grad_norm": 1.241137981414795, + "learning_rate": 7.282830744455896e-06, + "loss": 0.1284, + "step": 815 + }, + { + "epoch": 2.081218274111675, + "grad_norm": 1.3888517618179321, + "learning_rate": 7.103517556839661e-06, + "loss": 0.1163, + "step": 820 + }, + { + "epoch": 2.0939086294416245, + "grad_norm": 0.8490480780601501, + "learning_rate": 6.925752047265011e-06, + "loss": 0.106, + "step": 825 + }, + { + "epoch": 2.1065989847715736, + "grad_norm": 0.9379386305809021, + "learning_rate": 6.749569057039771e-06, + "loss": 0.1528, + "step": 830 + }, + { + "epoch": 2.1192893401015227, + "grad_norm": 1.0581309795379639, + "learning_rate": 6.575003117304535e-06, + "loss": 0.1099, + "step": 835 + }, + { + "epoch": 2.1319796954314723, + "grad_norm": 0.9221587777137756, + "learning_rate": 6.402088442264615e-06, + "loss": 0.1196, + "step": 840 + }, + { + "epoch": 2.1446700507614214, + "grad_norm": 1.0304913520812988, + "learning_rate": 6.230858922484289e-06, + "loss": 0.1324, + "step": 845 + }, + { + "epoch": 2.1573604060913705, + "grad_norm": 1.0631238222122192, + "learning_rate": 6.061348118244298e-06, + "loss": 0.1212, + "step": 850 + }, + { + "epoch": 2.1700507614213196, + "grad_norm": 0.90574049949646, + "learning_rate": 5.893589252964258e-06, + "loss": 0.1014, + "step": 855 + }, + { + "epoch": 2.182741116751269, + "grad_norm": 0.8504880666732788, + "learning_rate": 5.727615206690921e-06, + "loss": 0.1071, + "step": 860 + }, + { + "epoch": 2.1954314720812182, + "grad_norm": 0.9433344006538391, + "learning_rate": 5.563458509653905e-06, + "loss": 0.1249, + "step": 865 + }, + { + "epoch": 2.2081218274111674, + "grad_norm": 1.2438442707061768, + "learning_rate": 5.401151335889819e-06, + "loss": 0.1096, + "step": 870 + }, + { + "epoch": 2.220812182741117, + "grad_norm": 1.3032896518707275, + "learning_rate": 5.240725496936373e-06, + "loss": 0.1186, + "step": 875 + }, + { + "epoch": 2.233502538071066, + "grad_norm": 0.86854487657547, + "learning_rate": 5.082212435597352e-06, + "loss": 0.1204, + "step": 880 + }, + { + "epoch": 2.246192893401015, + "grad_norm": 1.0228619575500488, + "learning_rate": 4.925643219780053e-06, + "loss": 0.1122, + "step": 885 + }, + { + "epoch": 2.2588832487309647, + "grad_norm": 1.0082576274871826, + "learning_rate": 4.771048536406012e-06, + "loss": 0.1134, + "step": 890 + }, + { + "epoch": 2.271573604060914, + "grad_norm": 1.085213541984558, + "learning_rate": 4.618458685396579e-06, + "loss": 0.1207, + "step": 895 + }, + { + "epoch": 2.284263959390863, + "grad_norm": 1.3562793731689453, + "learning_rate": 4.467903573734174e-06, + "loss": 0.1095, + "step": 900 + }, + { + "epoch": 2.296954314720812, + "grad_norm": 0.9254333972930908, + "learning_rate": 4.319412709600723e-06, + "loss": 0.0927, + "step": 905 + }, + { + "epoch": 2.3096446700507616, + "grad_norm": 0.9643211364746094, + "learning_rate": 4.173015196594103e-06, + "loss": 0.1079, + "step": 910 + }, + { + "epoch": 2.3223350253807107, + "grad_norm": 0.9597625136375427, + "learning_rate": 4.028739728024023e-06, + "loss": 0.1259, + "step": 915 + }, + { + "epoch": 2.33502538071066, + "grad_norm": 1.0061312913894653, + "learning_rate": 3.886614581288187e-06, + "loss": 0.1086, + "step": 920 + }, + { + "epoch": 2.347715736040609, + "grad_norm": 0.8229241371154785, + "learning_rate": 3.746667612330109e-06, + "loss": 0.1065, + "step": 925 + }, + { + "epoch": 2.3604060913705585, + "grad_norm": 0.7584841847419739, + "learning_rate": 3.608926250179392e-06, + "loss": 0.1048, + "step": 930 + }, + { + "epoch": 2.3730964467005076, + "grad_norm": 0.8581719398498535, + "learning_rate": 3.4734174915758245e-06, + "loss": 0.1026, + "step": 935 + }, + { + "epoch": 2.3857868020304567, + "grad_norm": 0.9768736958503723, + "learning_rate": 3.340167895678059e-06, + "loss": 0.1019, + "step": 940 + }, + { + "epoch": 2.3984771573604062, + "grad_norm": 0.9733632802963257, + "learning_rate": 3.2092035788581907e-06, + "loss": 0.1076, + "step": 945 + }, + { + "epoch": 2.4111675126903553, + "grad_norm": 0.8969612121582031, + "learning_rate": 3.0805502095829987e-06, + "loss": 0.089, + "step": 950 + }, + { + "epoch": 2.4238578680203045, + "grad_norm": 0.8802261352539062, + "learning_rate": 2.954233003383089e-06, + "loss": 0.0974, + "step": 955 + }, + { + "epoch": 2.436548223350254, + "grad_norm": 0.9613264799118042, + "learning_rate": 2.830276717910692e-06, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 2.449238578680203, + "grad_norm": 1.1224033832550049, + "learning_rate": 2.7087056480873322e-06, + "loss": 0.1061, + "step": 965 + }, + { + "epoch": 2.4619289340101522, + "grad_norm": 0.8096681833267212, + "learning_rate": 2.5895436213420566e-06, + "loss": 0.0963, + "step": 970 + }, + { + "epoch": 2.4746192893401013, + "grad_norm": 0.8827739357948303, + "learning_rate": 2.4728139929414185e-06, + "loss": 0.1041, + "step": 975 + }, + { + "epoch": 2.487309644670051, + "grad_norm": 0.9586897492408752, + "learning_rate": 2.358539641411883e-06, + "loss": 0.1239, + "step": 980 + }, + { + "epoch": 2.5, + "grad_norm": 1.3292133808135986, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.1006, + "step": 985 + }, + { + "epoch": 2.512690355329949, + "grad_norm": 0.9215099215507507, + "learning_rate": 2.1374458725615147e-06, + "loss": 0.1001, + "step": 990 + }, + { + "epoch": 2.525380710659898, + "grad_norm": 0.7868054509162903, + "learning_rate": 2.0306697887089238e-06, + "loss": 0.1057, + "step": 995 + }, + { + "epoch": 2.5380710659898478, + "grad_norm": 0.9105503559112549, + "learning_rate": 1.9264356401707402e-06, + "loss": 0.097, + "step": 1000 + }, + { + "epoch": 2.550761421319797, + "grad_norm": 0.7940577268600464, + "learning_rate": 1.824763856410861e-06, + "loss": 0.0989, + "step": 1005 + }, + { + "epoch": 2.563451776649746, + "grad_norm": 0.7759144902229309, + "learning_rate": 1.7256743646802092e-06, + "loss": 0.1125, + "step": 1010 + }, + { + "epoch": 2.5761421319796955, + "grad_norm": 0.6648419499397278, + "learning_rate": 1.6291865861111354e-06, + "loss": 0.1032, + "step": 1015 + }, + { + "epoch": 2.5888324873096447, + "grad_norm": 0.9390238523483276, + "learning_rate": 1.5353194319108916e-06, + "loss": 0.0919, + "step": 1020 + }, + { + "epoch": 2.6015228426395938, + "grad_norm": 0.857590913772583, + "learning_rate": 1.4440912996551753e-06, + "loss": 0.0986, + "step": 1025 + }, + { + "epoch": 2.6142131979695433, + "grad_norm": 0.947079598903656, + "learning_rate": 1.3555200696822235e-06, + "loss": 0.1079, + "step": 1030 + }, + { + "epoch": 2.6269035532994924, + "grad_norm": 0.8943976163864136, + "learning_rate": 1.2696231015883914e-06, + "loss": 0.0892, + "step": 1035 + }, + { + "epoch": 2.6395939086294415, + "grad_norm": 0.8465593457221985, + "learning_rate": 1.186417230825695e-06, + "loss": 0.1158, + "step": 1040 + }, + { + "epoch": 2.652284263959391, + "grad_norm": 0.9021741151809692, + "learning_rate": 1.1059187654021764e-06, + "loss": 0.1043, + "step": 1045 + }, + { + "epoch": 2.66497461928934, + "grad_norm": 0.6911149621009827, + "learning_rate": 1.0281434826855647e-06, + "loss": 0.0981, + "step": 1050 + }, + { + "epoch": 2.6776649746192893, + "grad_norm": 0.821882963180542, + "learning_rate": 9.531066263109972e-07, + "loss": 0.0913, + "step": 1055 + }, + { + "epoch": 2.6903553299492384, + "grad_norm": 0.7322788238525391, + "learning_rate": 8.808229031933285e-07, + "loss": 0.0961, + "step": 1060 + }, + { + "epoch": 2.703045685279188, + "grad_norm": 0.8067730069160461, + "learning_rate": 8.113064806446286e-07, + "loss": 0.0998, + "step": 1065 + }, + { + "epoch": 2.715736040609137, + "grad_norm": 0.7199844717979431, + "learning_rate": 7.445709835974512e-07, + "loss": 0.0959, + "step": 1070 + }, + { + "epoch": 2.728426395939086, + "grad_norm": 0.9346873760223389, + "learning_rate": 6.806294919344053e-07, + "loss": 0.1053, + "step": 1075 + }, + { + "epoch": 2.7411167512690353, + "grad_norm": 0.8869054913520813, + "learning_rate": 6.194945379245498e-07, + "loss": 0.1082, + "step": 1080 + }, + { + "epoch": 2.753807106598985, + "grad_norm": 0.8591873049736023, + "learning_rate": 5.611781037671176e-07, + "loss": 0.1031, + "step": 1085 + }, + { + "epoch": 2.766497461928934, + "grad_norm": 0.8201610445976257, + "learning_rate": 5.056916192430622e-07, + "loss": 0.1037, + "step": 1090 + }, + { + "epoch": 2.779187817258883, + "grad_norm": 0.770133376121521, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.09, + "step": 1095 + }, + { + "epoch": 2.7918781725888326, + "grad_norm": 0.7147127389907837, + "learning_rate": 4.032514427950307e-07, + "loss": 0.0912, + "step": 1100 + }, + { + "epoch": 2.8045685279187818, + "grad_norm": 0.7039284110069275, + "learning_rate": 3.56317828723795e-07, + "loss": 0.0824, + "step": 1105 + }, + { + "epoch": 2.817258883248731, + "grad_norm": 0.9477445483207703, + "learning_rate": 3.1225431605624134e-07, + "loss": 0.0911, + "step": 1110 + }, + { + "epoch": 2.8299492385786804, + "grad_norm": 0.9587734341621399, + "learning_rate": 2.710695410593994e-07, + "loss": 0.0969, + "step": 1115 + }, + { + "epoch": 2.8426395939086295, + "grad_norm": 0.8074435591697693, + "learning_rate": 2.3277157577957398e-07, + "loss": 0.0953, + "step": 1120 + }, + { + "epoch": 2.8553299492385786, + "grad_norm": 0.7489620447158813, + "learning_rate": 1.9736792646024847e-07, + "loss": 0.0926, + "step": 1125 + }, + { + "epoch": 2.868020304568528, + "grad_norm": 0.7105598449707031, + "learning_rate": 1.6486553207090194e-07, + "loss": 0.0979, + "step": 1130 + }, + { + "epoch": 2.8807106598984773, + "grad_norm": 0.8042716383934021, + "learning_rate": 1.3527076294698848e-07, + "loss": 0.0883, + "step": 1135 + }, + { + "epoch": 2.8934010152284264, + "grad_norm": 0.9010692834854126, + "learning_rate": 1.0858941954139112e-07, + "loss": 0.0854, + "step": 1140 + }, + { + "epoch": 2.9060913705583755, + "grad_norm": 0.8455910086631775, + "learning_rate": 8.482673128753948e-08, + "loss": 0.082, + "step": 1145 + }, + { + "epoch": 2.9187817258883246, + "grad_norm": 0.6845624446868896, + "learning_rate": 6.398735557448299e-08, + "loss": 0.0836, + "step": 1150 + }, + { + "epoch": 2.931472081218274, + "grad_norm": 0.7735778093338013, + "learning_rate": 4.607537683404106e-08, + "loss": 0.0904, + "step": 1155 + }, + { + "epoch": 2.9441624365482233, + "grad_norm": 0.9506065249443054, + "learning_rate": 3.1094305740291816e-08, + "loss": 0.1095, + "step": 1160 + }, + { + "epoch": 2.9568527918781724, + "grad_norm": 0.7977011799812317, + "learning_rate": 1.9047078521474137e-08, + "loss": 0.1199, + "step": 1165 + }, + { + "epoch": 2.969543147208122, + "grad_norm": 0.9989894032478333, + "learning_rate": 9.93605638451467e-09, + "loss": 0.0951, + "step": 1170 + }, + { + "epoch": 2.982233502538071, + "grad_norm": 0.8241462111473083, + "learning_rate": 3.763025052231361e-09, + "loss": 0.1023, + "step": 1175 + }, + { + "epoch": 2.99492385786802, + "grad_norm": 0.6637108325958252, + "learning_rate": 5.29194413350087e-10, + "loss": 0.0912, + "step": 1180 + }, + { + "epoch": 3.0, + "step": 1182, + "total_flos": 1.5061941455452897e+18, + "train_loss": 0.4424345256436093, + "train_runtime": 641.9077, + "train_samples_per_second": 58.878, + "train_steps_per_second": 1.841 + } + ], + "logging_steps": 5, + "max_steps": 1182, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5061941455452897e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..539010dcce1e4f5e9430a3818daabe4807fe36d6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/14_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b0c5a083da0b57f92c1f99a35ddc89abb2d29539edbc9bd8461112b998f724 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f5026570ebedbc593e303c3b2290d69cf31eb3a0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 15_128_e3_3e-5 + results: [] +--- + + + +# 15_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa5f0778f9ec8aba73ed449fc99f489908e9d169 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj", + "k_proj", + "up_proj", + "down_proj", + "o_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5249badb198b08463d6de0563f47e0766313c741 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c519b82d8be593f7489cef78e26e04b6f49f63384e742ef7f071b10700f00a49 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c4aac6f77b3f2f9611643f1840a061033a064685 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3289249660741878e+18, + "train_loss": 0.4622165315596846, + "train_runtime": 566.114, + "train_samples": 11307, + "train_samples_per_second": 59.919, + "train_steps_per_second": 1.876 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c4aac6f77b3f2f9611643f1840a061033a064685 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3289249660741878e+18, + "train_loss": 0.4622165315596846, + "train_runtime": 566.114, + "train_samples": 11307, + "train_samples_per_second": 59.919, + "train_steps_per_second": 1.876 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ec78219ad0948ff948df63288c2425a77256ba84 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1527 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1062, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014144271570014143, + "grad_norm": 0.662504255771637, + "learning_rate": 2.222222222222222e-06, + "loss": 1.6004, + "step": 5 + }, + { + "epoch": 0.028288543140028287, + "grad_norm": 0.6674489974975586, + "learning_rate": 4.9999999999999996e-06, + "loss": 1.6672, + "step": 10 + }, + { + "epoch": 0.042432814710042434, + "grad_norm": 0.5424950122833252, + "learning_rate": 7.777777777777777e-06, + "loss": 1.691, + "step": 15 + }, + { + "epoch": 0.056577086280056574, + "grad_norm": 0.5175806283950806, + "learning_rate": 1.0555555555555555e-05, + "loss": 1.6716, + "step": 20 + }, + { + "epoch": 0.07072135785007072, + "grad_norm": 0.47757112979888916, + "learning_rate": 1.3333333333333333e-05, + "loss": 1.6354, + "step": 25 + }, + { + "epoch": 0.08486562942008487, + "grad_norm": 0.4925291836261749, + "learning_rate": 1.6111111111111115e-05, + "loss": 1.6336, + "step": 30 + }, + { + "epoch": 0.09900990099009901, + "grad_norm": 0.500893235206604, + "learning_rate": 1.888888888888889e-05, + "loss": 1.5978, + "step": 35 + }, + { + "epoch": 0.11315417256011315, + "grad_norm": 0.45501258969306946, + "learning_rate": 2.1666666666666667e-05, + "loss": 1.534, + "step": 40 + }, + { + "epoch": 0.1272984441301273, + "grad_norm": 0.49716073274612427, + "learning_rate": 2.4444444444444445e-05, + "loss": 1.5358, + "step": 45 + }, + { + "epoch": 0.14144271570014144, + "grad_norm": 0.5642329454421997, + "learning_rate": 2.7222222222222223e-05, + "loss": 1.5159, + "step": 50 + }, + { + "epoch": 0.15558698727015557, + "grad_norm": 0.5461786389350891, + "learning_rate": 3e-05, + "loss": 1.3607, + "step": 55 + }, + { + "epoch": 0.16973125884016974, + "grad_norm": 0.4951108396053314, + "learning_rate": 2.9998178743291113e-05, + "loss": 1.4223, + "step": 60 + }, + { + "epoch": 0.18387553041018387, + "grad_norm": 0.5914878249168396, + "learning_rate": 2.9992715415427915e-05, + "loss": 1.4058, + "step": 65 + }, + { + "epoch": 0.19801980198019803, + "grad_norm": 0.6083022952079773, + "learning_rate": 2.998361134309341e-05, + "loss": 1.3916, + "step": 70 + }, + { + "epoch": 0.21216407355021216, + "grad_norm": 0.5392953157424927, + "learning_rate": 2.997086873706798e-05, + "loss": 1.3217, + "step": 75 + }, + { + "epoch": 0.2263083451202263, + "grad_norm": 0.6700633764266968, + "learning_rate": 2.995449069169251e-05, + "loss": 1.3141, + "step": 80 + }, + { + "epoch": 0.24045261669024046, + "grad_norm": 0.7206193804740906, + "learning_rate": 2.9934481184117005e-05, + "loss": 1.2162, + "step": 85 + }, + { + "epoch": 0.2545968882602546, + "grad_norm": 0.6888675689697266, + "learning_rate": 2.9910845073334793e-05, + "loss": 1.2758, + "step": 90 + }, + { + "epoch": 0.26874115983026875, + "grad_norm": 0.6075741648674011, + "learning_rate": 2.9883588099002583e-05, + "loss": 1.2742, + "step": 95 + }, + { + "epoch": 0.2828854314002829, + "grad_norm": 0.6921921968460083, + "learning_rate": 2.9852716880046687e-05, + "loss": 1.3188, + "step": 100 + }, + { + "epoch": 0.297029702970297, + "grad_norm": 0.6580811142921448, + "learning_rate": 2.981823891305572e-05, + "loss": 1.2054, + "step": 105 + }, + { + "epoch": 0.31117397454031115, + "grad_norm": 0.694297194480896, + "learning_rate": 2.9780162570460182e-05, + "loss": 1.2048, + "step": 110 + }, + { + "epoch": 0.32531824611032534, + "grad_norm": 0.786004364490509, + "learning_rate": 2.9738497098499325e-05, + "loss": 1.1074, + "step": 115 + }, + { + "epoch": 0.33946251768033947, + "grad_norm": 0.7890642881393433, + "learning_rate": 2.969325261497586e-05, + "loss": 1.0792, + "step": 120 + }, + { + "epoch": 0.3536067892503536, + "grad_norm": 0.724420428276062, + "learning_rate": 2.9644440106799003e-05, + "loss": 1.1348, + "step": 125 + }, + { + "epoch": 0.36775106082036774, + "grad_norm": 0.9557508230209351, + "learning_rate": 2.9592071427316492e-05, + "loss": 1.0947, + "step": 130 + }, + { + "epoch": 0.38189533239038187, + "grad_norm": 0.8956389427185059, + "learning_rate": 2.9536159293436172e-05, + "loss": 1.1015, + "step": 135 + }, + { + "epoch": 0.39603960396039606, + "grad_norm": 0.8321068286895752, + "learning_rate": 2.9476717282537897e-05, + "loss": 1.0449, + "step": 140 + }, + { + "epoch": 0.4101838755304102, + "grad_norm": 0.902127742767334, + "learning_rate": 2.9413759829176497e-05, + "loss": 1.0601, + "step": 145 + }, + { + "epoch": 0.4243281471004243, + "grad_norm": 0.8920028209686279, + "learning_rate": 2.934730222157653e-05, + "loss": 0.9538, + "step": 150 + }, + { + "epoch": 0.43847241867043846, + "grad_norm": 1.0076197385787964, + "learning_rate": 2.927736059791984e-05, + "loss": 0.9779, + "step": 155 + }, + { + "epoch": 0.4526166902404526, + "grad_norm": 0.9957407712936401, + "learning_rate": 2.9203951942426587e-05, + "loss": 1.0008, + "step": 160 + }, + { + "epoch": 0.4667609618104668, + "grad_norm": 0.9646873474121094, + "learning_rate": 2.9127094081230952e-05, + "loss": 0.9698, + "step": 165 + }, + { + "epoch": 0.4809052333804809, + "grad_norm": 1.0112730264663696, + "learning_rate": 2.9046805678052312e-05, + "loss": 0.9091, + "step": 170 + }, + { + "epoch": 0.49504950495049505, + "grad_norm": 0.9719317555427551, + "learning_rate": 2.8963106229663064e-05, + "loss": 0.9852, + "step": 175 + }, + { + "epoch": 0.5091937765205092, + "grad_norm": 1.0120776891708374, + "learning_rate": 2.8876016061154125e-05, + "loss": 0.8922, + "step": 180 + }, + { + "epoch": 0.5233380480905233, + "grad_norm": 1.0641988515853882, + "learning_rate": 2.8785556320999308e-05, + "loss": 0.8768, + "step": 185 + }, + { + "epoch": 0.5374823196605375, + "grad_norm": 1.1291581392288208, + "learning_rate": 2.8691748975919784e-05, + "loss": 0.8397, + "step": 190 + }, + { + "epoch": 0.5516265912305516, + "grad_norm": 1.0794135332107544, + "learning_rate": 2.8594616805549752e-05, + "loss": 0.8204, + "step": 195 + }, + { + "epoch": 0.5657708628005658, + "grad_norm": 1.1569795608520508, + "learning_rate": 2.84941833969048e-05, + "loss": 0.7963, + "step": 200 + }, + { + "epoch": 0.57991513437058, + "grad_norm": 1.239943265914917, + "learning_rate": 2.8390473138654175e-05, + "loss": 0.8375, + "step": 205 + }, + { + "epoch": 0.594059405940594, + "grad_norm": 1.1205483675003052, + "learning_rate": 2.828351121519835e-05, + "loss": 0.776, + "step": 210 + }, + { + "epoch": 0.6082036775106082, + "grad_norm": 1.0735560655593872, + "learning_rate": 2.817332360055343e-05, + "loss": 0.7651, + "step": 215 + }, + { + "epoch": 0.6223479490806223, + "grad_norm": 1.2014222145080566, + "learning_rate": 2.8059937052043724e-05, + "loss": 0.7951, + "step": 220 + }, + { + "epoch": 0.6364922206506365, + "grad_norm": 1.1695849895477295, + "learning_rate": 2.7943379103804196e-05, + "loss": 0.756, + "step": 225 + }, + { + "epoch": 0.6506364922206507, + "grad_norm": 1.1137144565582275, + "learning_rate": 2.7823678060094198e-05, + "loss": 0.7206, + "step": 230 + }, + { + "epoch": 0.6647807637906648, + "grad_norm": 1.123942494392395, + "learning_rate": 2.770086298842426e-05, + "loss": 0.6955, + "step": 235 + }, + { + "epoch": 0.6789250353606789, + "grad_norm": 1.1222422122955322, + "learning_rate": 2.7574963712497486e-05, + "loss": 0.7958, + "step": 240 + }, + { + "epoch": 0.693069306930693, + "grad_norm": 1.2997254133224487, + "learning_rate": 2.7446010804967314e-05, + "loss": 0.7314, + "step": 245 + }, + { + "epoch": 0.7072135785007072, + "grad_norm": 1.2394288778305054, + "learning_rate": 2.7314035580013487e-05, + "loss": 0.7193, + "step": 250 + }, + { + "epoch": 0.7213578500707214, + "grad_norm": 1.1304413080215454, + "learning_rate": 2.717907008573785e-05, + "loss": 0.7465, + "step": 255 + }, + { + "epoch": 0.7355021216407355, + "grad_norm": 1.3065577745437622, + "learning_rate": 2.7041147096381988e-05, + "loss": 0.6582, + "step": 260 + }, + { + "epoch": 0.7496463932107497, + "grad_norm": 1.3507620096206665, + "learning_rate": 2.6900300104368527e-05, + "loss": 0.683, + "step": 265 + }, + { + "epoch": 0.7637906647807637, + "grad_norm": 1.197556734085083, + "learning_rate": 2.6756563312168022e-05, + "loss": 0.6804, + "step": 270 + }, + { + "epoch": 0.7779349363507779, + "grad_norm": 1.303550124168396, + "learning_rate": 2.6609971623993412e-05, + "loss": 0.5796, + "step": 275 + }, + { + "epoch": 0.7920792079207921, + "grad_norm": 1.2295936346054077, + "learning_rate": 2.6460560637324113e-05, + "loss": 0.5791, + "step": 280 + }, + { + "epoch": 0.8062234794908062, + "grad_norm": 1.2018368244171143, + "learning_rate": 2.6308366634261697e-05, + "loss": 0.591, + "step": 285 + }, + { + "epoch": 0.8203677510608204, + "grad_norm": 1.2019376754760742, + "learning_rate": 2.6153426572719393e-05, + "loss": 0.6465, + "step": 290 + }, + { + "epoch": 0.8345120226308345, + "grad_norm": 1.1662421226501465, + "learning_rate": 2.5995778077447393e-05, + "loss": 0.5778, + "step": 295 + }, + { + "epoch": 0.8486562942008486, + "grad_norm": 1.3374278545379639, + "learning_rate": 2.5835459430896334e-05, + "loss": 0.5926, + "step": 300 + }, + { + "epoch": 0.8628005657708628, + "grad_norm": 1.222502589225769, + "learning_rate": 2.5672509563920953e-05, + "loss": 0.6085, + "step": 305 + }, + { + "epoch": 0.8769448373408769, + "grad_norm": 1.3008819818496704, + "learning_rate": 2.5506968046326374e-05, + "loss": 0.5836, + "step": 310 + }, + { + "epoch": 0.8910891089108911, + "grad_norm": 1.10532546043396, + "learning_rate": 2.5338875077259207e-05, + "loss": 0.6051, + "step": 315 + }, + { + "epoch": 0.9052333804809052, + "grad_norm": 1.1586799621582031, + "learning_rate": 2.5168271475445795e-05, + "loss": 0.581, + "step": 320 + }, + { + "epoch": 0.9193776520509194, + "grad_norm": 1.3252677917480469, + "learning_rate": 2.499519866928006e-05, + "loss": 0.5576, + "step": 325 + }, + { + "epoch": 0.9335219236209336, + "grad_norm": 1.299154281616211, + "learning_rate": 2.4819698686763245e-05, + "loss": 0.5762, + "step": 330 + }, + { + "epoch": 0.9476661951909476, + "grad_norm": 1.2005584239959717, + "learning_rate": 2.464181414529809e-05, + "loss": 0.5808, + "step": 335 + }, + { + "epoch": 0.9618104667609618, + "grad_norm": 1.1901746988296509, + "learning_rate": 2.4461588241339877e-05, + "loss": 0.5452, + "step": 340 + }, + { + "epoch": 0.9759547383309759, + "grad_norm": 1.2298418283462524, + "learning_rate": 2.4279064739906824e-05, + "loss": 0.515, + "step": 345 + }, + { + "epoch": 0.9900990099009901, + "grad_norm": 1.3288127183914185, + "learning_rate": 2.4094287963952468e-05, + "loss": 0.5713, + "step": 350 + }, + { + "epoch": 1.002828854314003, + "grad_norm": 1.2304744720458984, + "learning_rate": 2.3907302783602522e-05, + "loss": 0.5068, + "step": 355 + }, + { + "epoch": 1.016973125884017, + "grad_norm": 1.1948823928833008, + "learning_rate": 2.3718154605258885e-05, + "loss": 0.4538, + "step": 360 + }, + { + "epoch": 1.031117397454031, + "grad_norm": 1.315022349357605, + "learning_rate": 2.3526889360573387e-05, + "loss": 0.4532, + "step": 365 + }, + { + "epoch": 1.0452616690240453, + "grad_norm": 1.5762590169906616, + "learning_rate": 2.3333553495294033e-05, + "loss": 0.4426, + "step": 370 + }, + { + "epoch": 1.0594059405940595, + "grad_norm": 1.4747891426086426, + "learning_rate": 2.3138193957986392e-05, + "loss": 0.4487, + "step": 375 + }, + { + "epoch": 1.0735502121640736, + "grad_norm": 1.4535807371139526, + "learning_rate": 2.2940858188632853e-05, + "loss": 0.45, + "step": 380 + }, + { + "epoch": 1.0876944837340876, + "grad_norm": 1.2648611068725586, + "learning_rate": 2.27415941071126e-05, + "loss": 0.4016, + "step": 385 + }, + { + "epoch": 1.1018387553041018, + "grad_norm": 1.4532794952392578, + "learning_rate": 2.2540450101565002e-05, + "loss": 0.4416, + "step": 390 + }, + { + "epoch": 1.115983026874116, + "grad_norm": 1.2763341665267944, + "learning_rate": 2.233747501663934e-05, + "loss": 0.4221, + "step": 395 + }, + { + "epoch": 1.1301272984441302, + "grad_norm": 1.4798238277435303, + "learning_rate": 2.213271814163363e-05, + "loss": 0.4516, + "step": 400 + }, + { + "epoch": 1.1442715700141444, + "grad_norm": 1.2529631853103638, + "learning_rate": 2.192622919852551e-05, + "loss": 0.3897, + "step": 405 + }, + { + "epoch": 1.1584158415841583, + "grad_norm": 1.325947880744934, + "learning_rate": 2.171805832989804e-05, + "loss": 0.3667, + "step": 410 + }, + { + "epoch": 1.1725601131541725, + "grad_norm": 1.2170391082763672, + "learning_rate": 2.1508256086763372e-05, + "loss": 0.4396, + "step": 415 + }, + { + "epoch": 1.1867043847241867, + "grad_norm": 1.295289158821106, + "learning_rate": 2.1296873416287216e-05, + "loss": 0.4207, + "step": 420 + }, + { + "epoch": 1.200848656294201, + "grad_norm": 1.321271300315857, + "learning_rate": 2.108396164941713e-05, + "loss": 0.4256, + "step": 425 + }, + { + "epoch": 1.214992927864215, + "grad_norm": 1.395585060119629, + "learning_rate": 2.0869572488417626e-05, + "loss": 0.363, + "step": 430 + }, + { + "epoch": 1.229137199434229, + "grad_norm": 1.390397310256958, + "learning_rate": 2.0653757994315078e-05, + "loss": 0.3648, + "step": 435 + }, + { + "epoch": 1.2432814710042432, + "grad_norm": 1.3710654973983765, + "learning_rate": 2.0436570574255526e-05, + "loss": 0.3292, + "step": 440 + }, + { + "epoch": 1.2574257425742574, + "grad_norm": 1.2898603677749634, + "learning_rate": 2.0218062968778407e-05, + "loss": 0.3697, + "step": 445 + }, + { + "epoch": 1.2715700141442716, + "grad_norm": 1.3578029870986938, + "learning_rate": 1.999828823900939e-05, + "loss": 0.3735, + "step": 450 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 1.4044346809387207, + "learning_rate": 1.977729975377527e-05, + "loss": 0.3629, + "step": 455 + }, + { + "epoch": 1.2998585572842998, + "grad_norm": 1.2618662118911743, + "learning_rate": 1.9555151176644223e-05, + "loss": 0.3501, + "step": 460 + }, + { + "epoch": 1.314002828854314, + "grad_norm": 1.4585261344909668, + "learning_rate": 1.933189645289445e-05, + "loss": 0.3483, + "step": 465 + }, + { + "epoch": 1.3281471004243282, + "grad_norm": 1.4348485469818115, + "learning_rate": 1.91075897964144e-05, + "loss": 0.3623, + "step": 470 + }, + { + "epoch": 1.3422913719943423, + "grad_norm": 1.3839032649993896, + "learning_rate": 1.888228567653781e-05, + "loss": 0.3288, + "step": 475 + }, + { + "epoch": 1.3564356435643563, + "grad_norm": 1.4295552968978882, + "learning_rate": 1.8656038804816662e-05, + "loss": 0.342, + "step": 480 + }, + { + "epoch": 1.3705799151343705, + "grad_norm": 1.2766201496124268, + "learning_rate": 1.8428904121735345e-05, + "loss": 0.3513, + "step": 485 + }, + { + "epoch": 1.3847241867043847, + "grad_norm": 1.7437549829483032, + "learning_rate": 1.8200936783369252e-05, + "loss": 0.2925, + "step": 490 + }, + { + "epoch": 1.3988684582743989, + "grad_norm": 1.2318593263626099, + "learning_rate": 1.7972192147990966e-05, + "loss": 0.303, + "step": 495 + }, + { + "epoch": 1.413012729844413, + "grad_norm": 1.726684331893921, + "learning_rate": 1.7742725762627395e-05, + "loss": 0.3055, + "step": 500 + }, + { + "epoch": 1.427157001414427, + "grad_norm": 1.3405637741088867, + "learning_rate": 1.7512593349571046e-05, + "loss": 0.2989, + "step": 505 + }, + { + "epoch": 1.4413012729844414, + "grad_norm": 1.3660539388656616, + "learning_rate": 1.7281850792848752e-05, + "loss": 0.304, + "step": 510 + }, + { + "epoch": 1.4554455445544554, + "grad_norm": 1.2074600458145142, + "learning_rate": 1.7050554124651103e-05, + "loss": 0.2777, + "step": 515 + }, + { + "epoch": 1.4695898161244696, + "grad_norm": 1.2306835651397705, + "learning_rate": 1.6818759511725922e-05, + "loss": 0.2906, + "step": 520 + }, + { + "epoch": 1.4837340876944838, + "grad_norm": 1.1288750171661377, + "learning_rate": 1.658652324173907e-05, + "loss": 0.2643, + "step": 525 + }, + { + "epoch": 1.4978783592644977, + "grad_norm": 1.3785244226455688, + "learning_rate": 1.6353901709605838e-05, + "loss": 0.2356, + "step": 530 + }, + { + "epoch": 1.5120226308345122, + "grad_norm": 1.2890878915786743, + "learning_rate": 1.6120951403796367e-05, + "loss": 0.2665, + "step": 535 + }, + { + "epoch": 1.5261669024045261, + "grad_norm": 1.5150102376937866, + "learning_rate": 1.5887728892618292e-05, + "loss": 0.2453, + "step": 540 + }, + { + "epoch": 1.5403111739745403, + "grad_norm": 1.2405489683151245, + "learning_rate": 1.5654290810480043e-05, + "loss": 0.2615, + "step": 545 + }, + { + "epoch": 1.5544554455445545, + "grad_norm": 1.3498972654342651, + "learning_rate": 1.5420693844138036e-05, + "loss": 0.2729, + "step": 550 + }, + { + "epoch": 1.5685997171145685, + "grad_norm": 1.3508979082107544, + "learning_rate": 1.5186994718931227e-05, + "loss": 0.266, + "step": 555 + }, + { + "epoch": 1.5827439886845829, + "grad_norm": 1.4381049871444702, + "learning_rate": 1.4953250185006236e-05, + "loss": 0.2353, + "step": 560 + }, + { + "epoch": 1.5968882602545968, + "grad_norm": 1.3193823099136353, + "learning_rate": 1.471951700353647e-05, + "loss": 0.263, + "step": 565 + }, + { + "epoch": 1.611032531824611, + "grad_norm": 1.447575569152832, + "learning_rate": 1.4485851932938575e-05, + "loss": 0.2698, + "step": 570 + }, + { + "epoch": 1.6251768033946252, + "grad_norm": 1.6517528295516968, + "learning_rate": 1.4252311715089542e-05, + "loss": 0.2711, + "step": 575 + }, + { + "epoch": 1.6393210749646392, + "grad_norm": 1.2753918170928955, + "learning_rate": 1.4018953061547854e-05, + "loss": 0.2634, + "step": 580 + }, + { + "epoch": 1.6534653465346536, + "grad_norm": 1.2248281240463257, + "learning_rate": 1.3785832639781951e-05, + "loss": 0.2334, + "step": 585 + }, + { + "epoch": 1.6676096181046676, + "grad_norm": 1.2293918132781982, + "learning_rate": 1.355300705940945e-05, + "loss": 0.2401, + "step": 590 + }, + { + "epoch": 1.6817538896746818, + "grad_norm": 1.1760867834091187, + "learning_rate": 1.3320532858450382e-05, + "loss": 0.2309, + "step": 595 + }, + { + "epoch": 1.695898161244696, + "grad_norm": 1.1836364269256592, + "learning_rate": 1.3088466489597838e-05, + "loss": 0.2465, + "step": 600 + }, + { + "epoch": 1.71004243281471, + "grad_norm": 1.376364827156067, + "learning_rate": 1.2856864306509303e-05, + "loss": 0.2504, + "step": 605 + }, + { + "epoch": 1.7241867043847243, + "grad_norm": 1.5037505626678467, + "learning_rate": 1.2625782550122076e-05, + "loss": 0.2191, + "step": 610 + }, + { + "epoch": 1.7383309759547383, + "grad_norm": 1.2329646348953247, + "learning_rate": 1.2395277334996045e-05, + "loss": 0.2203, + "step": 615 + }, + { + "epoch": 1.7524752475247525, + "grad_norm": 1.3618288040161133, + "learning_rate": 1.2165404635687139e-05, + "loss": 0.2178, + "step": 620 + }, + { + "epoch": 1.7666195190947667, + "grad_norm": 1.4239367246627808, + "learning_rate": 1.1936220273154797e-05, + "loss": 0.2369, + "step": 625 + }, + { + "epoch": 1.7807637906647806, + "grad_norm": 1.1292532682418823, + "learning_rate": 1.1707779901206733e-05, + "loss": 0.2163, + "step": 630 + }, + { + "epoch": 1.794908062234795, + "grad_norm": 1.2973660230636597, + "learning_rate": 1.1480138992984276e-05, + "loss": 0.2036, + "step": 635 + }, + { + "epoch": 1.809052333804809, + "grad_norm": 1.3260688781738281, + "learning_rate": 1.1253352827491594e-05, + "loss": 0.1844, + "step": 640 + }, + { + "epoch": 1.8231966053748232, + "grad_norm": 1.065302848815918, + "learning_rate": 1.102747647617209e-05, + "loss": 0.1922, + "step": 645 + }, + { + "epoch": 1.8373408769448374, + "grad_norm": 1.837449550628662, + "learning_rate": 1.080256478953512e-05, + "loss": 0.2314, + "step": 650 + }, + { + "epoch": 1.8514851485148514, + "grad_norm": 1.3555163145065308, + "learning_rate": 1.0578672383836437e-05, + "loss": 0.1632, + "step": 655 + }, + { + "epoch": 1.8656294200848658, + "grad_norm": 1.2571073770523071, + "learning_rate": 1.0355853627815506e-05, + "loss": 0.2083, + "step": 660 + }, + { + "epoch": 1.8797736916548797, + "grad_norm": 1.3556491136550903, + "learning_rate": 1.0134162629492895e-05, + "loss": 0.205, + "step": 665 + }, + { + "epoch": 1.893917963224894, + "grad_norm": 1.448289394378662, + "learning_rate": 9.913653223031e-06, + "loss": 0.2266, + "step": 670 + }, + { + "epoch": 1.908062234794908, + "grad_norm": 1.0873290300369263, + "learning_rate": 9.69437895566128e-06, + "loss": 0.1884, + "step": 675 + }, + { + "epoch": 1.922206506364922, + "grad_norm": 1.3184982538223267, + "learning_rate": 9.476393074681134e-06, + "loss": 0.2001, + "step": 680 + }, + { + "epoch": 1.9363507779349365, + "grad_norm": 1.1982475519180298, + "learning_rate": 9.259748514523654e-06, + "loss": 0.1489, + "step": 685 + }, + { + "epoch": 1.9504950495049505, + "grad_norm": 1.1731574535369873, + "learning_rate": 9.044497883903326e-06, + "loss": 0.1784, + "step": 690 + }, + { + "epoch": 1.9646393210749646, + "grad_norm": 1.113274097442627, + "learning_rate": 8.83069345304083e-06, + "loss": 0.1823, + "step": 695 + }, + { + "epoch": 1.9787835926449788, + "grad_norm": 1.2763826847076416, + "learning_rate": 8.618387140970047e-06, + "loss": 0.213, + "step": 700 + }, + { + "epoch": 1.9929278642149928, + "grad_norm": 1.400197148323059, + "learning_rate": 8.407630502930325e-06, + "loss": 0.1663, + "step": 705 + }, + { + "epoch": 2.005657708628006, + "grad_norm": 1.1463834047317505, + "learning_rate": 8.198474717847146e-06, + "loss": 0.1578, + "step": 710 + }, + { + "epoch": 2.01980198019802, + "grad_norm": 1.283757209777832, + "learning_rate": 7.99097057590407e-06, + "loss": 0.1469, + "step": 715 + }, + { + "epoch": 2.033946251768034, + "grad_norm": 1.1335558891296387, + "learning_rate": 7.785168466209188e-06, + "loss": 0.1411, + "step": 720 + }, + { + "epoch": 2.048090523338048, + "grad_norm": 1.1043405532836914, + "learning_rate": 7.581118364558889e-06, + "loss": 0.1395, + "step": 725 + }, + { + "epoch": 2.062234794908062, + "grad_norm": 1.1257781982421875, + "learning_rate": 7.378869821302062e-06, + "loss": 0.1282, + "step": 730 + }, + { + "epoch": 2.0763790664780766, + "grad_norm": 1.278018832206726, + "learning_rate": 7.17847194930753e-06, + "loss": 0.1369, + "step": 735 + }, + { + "epoch": 2.0905233380480905, + "grad_norm": 1.02983820438385, + "learning_rate": 6.9799734120378105e-06, + "loss": 0.1289, + "step": 740 + }, + { + "epoch": 2.1046676096181045, + "grad_norm": 1.1169861555099487, + "learning_rate": 6.783422411731932e-06, + "loss": 0.1363, + "step": 745 + }, + { + "epoch": 2.118811881188119, + "grad_norm": 1.1570967435836792, + "learning_rate": 6.58886667770028e-06, + "loss": 0.1435, + "step": 750 + }, + { + "epoch": 2.132956152758133, + "grad_norm": 0.9197884202003479, + "learning_rate": 6.3963534547343126e-06, + "loss": 0.1211, + "step": 755 + }, + { + "epoch": 2.1471004243281473, + "grad_norm": 1.0333482027053833, + "learning_rate": 6.205929491633869e-06, + "loss": 0.1346, + "step": 760 + }, + { + "epoch": 2.1612446958981613, + "grad_norm": 1.232978105545044, + "learning_rate": 6.017641029854996e-06, + "loss": 0.1227, + "step": 765 + }, + { + "epoch": 2.1753889674681752, + "grad_norm": 1.1105915307998657, + "learning_rate": 5.831533792280926e-06, + "loss": 0.1213, + "step": 770 + }, + { + "epoch": 2.1895332390381896, + "grad_norm": 1.0993432998657227, + "learning_rate": 5.647652972118998e-06, + "loss": 0.125, + "step": 775 + }, + { + "epoch": 2.2036775106082036, + "grad_norm": 0.9111847877502441, + "learning_rate": 5.46604322192618e-06, + "loss": 0.1255, + "step": 780 + }, + { + "epoch": 2.217821782178218, + "grad_norm": 1.2181860208511353, + "learning_rate": 5.286748642765945e-06, + "loss": 0.1343, + "step": 785 + }, + { + "epoch": 2.231966053748232, + "grad_norm": 0.9866435527801514, + "learning_rate": 5.109812773498968e-06, + "loss": 0.1265, + "step": 790 + }, + { + "epoch": 2.246110325318246, + "grad_norm": 1.0655810832977295, + "learning_rate": 4.935278580210451e-06, + "loss": 0.1179, + "step": 795 + }, + { + "epoch": 2.2602545968882604, + "grad_norm": 0.8748611211776733, + "learning_rate": 4.763188445776447e-06, + "loss": 0.1227, + "step": 800 + }, + { + "epoch": 2.2743988684582743, + "grad_norm": 0.7836571335792542, + "learning_rate": 4.593584159571875e-06, + "loss": 0.1175, + "step": 805 + }, + { + "epoch": 2.2885431400282887, + "grad_norm": 0.8583051562309265, + "learning_rate": 4.426506907322624e-06, + "loss": 0.0997, + "step": 810 + }, + { + "epoch": 2.3026874115983027, + "grad_norm": 0.926509439945221, + "learning_rate": 4.261997261104223e-06, + "loss": 0.1161, + "step": 815 + }, + { + "epoch": 2.3168316831683167, + "grad_norm": 0.9428642392158508, + "learning_rate": 4.100095169489597e-06, + "loss": 0.1243, + "step": 820 + }, + { + "epoch": 2.330975954738331, + "grad_norm": 0.9933550357818604, + "learning_rate": 3.940839947848141e-06, + "loss": 0.1117, + "step": 825 + }, + { + "epoch": 2.345120226308345, + "grad_norm": 1.0230233669281006, + "learning_rate": 3.7842702687986374e-06, + "loss": 0.1241, + "step": 830 + }, + { + "epoch": 2.3592644978783595, + "grad_norm": 1.0956615209579468, + "learning_rate": 3.630424152818203e-06, + "loss": 0.1188, + "step": 835 + }, + { + "epoch": 2.3734087694483734, + "grad_norm": 0.9940826296806335, + "learning_rate": 3.479338959009625e-06, + "loss": 0.106, + "step": 840 + }, + { + "epoch": 2.3875530410183874, + "grad_norm": 0.908791720867157, + "learning_rate": 3.331051376029279e-06, + "loss": 0.114, + "step": 845 + }, + { + "epoch": 2.401697312588402, + "grad_norm": 1.011662244796753, + "learning_rate": 3.1855974131778696e-06, + "loss": 0.111, + "step": 850 + }, + { + "epoch": 2.4158415841584158, + "grad_norm": 0.908149003982544, + "learning_rate": 3.0430123916561677e-06, + "loss": 0.1015, + "step": 855 + }, + { + "epoch": 2.42998585572843, + "grad_norm": 0.9735711216926575, + "learning_rate": 2.90333093598776e-06, + "loss": 0.1115, + "step": 860 + }, + { + "epoch": 2.444130127298444, + "grad_norm": 0.9337850213050842, + "learning_rate": 2.7665869656110974e-06, + "loss": 0.1172, + "step": 865 + }, + { + "epoch": 2.458274398868458, + "grad_norm": 0.9912124276161194, + "learning_rate": 2.6328136866426294e-06, + "loss": 0.1128, + "step": 870 + }, + { + "epoch": 2.4724186704384725, + "grad_norm": 0.9298767447471619, + "learning_rate": 2.502043583813268e-06, + "loss": 0.1091, + "step": 875 + }, + { + "epoch": 2.4865629420084865, + "grad_norm": 0.9068766832351685, + "learning_rate": 2.3743084125799515e-06, + "loss": 0.1152, + "step": 880 + }, + { + "epoch": 2.500707213578501, + "grad_norm": 1.0035040378570557, + "learning_rate": 2.2496391914143634e-06, + "loss": 0.1148, + "step": 885 + }, + { + "epoch": 2.514851485148515, + "grad_norm": 0.7804632186889648, + "learning_rate": 2.1280661942705554e-06, + "loss": 0.1088, + "step": 890 + }, + { + "epoch": 2.528995756718529, + "grad_norm": 0.8839090466499329, + "learning_rate": 2.0096189432334194e-06, + "loss": 0.1167, + "step": 895 + }, + { + "epoch": 2.5431400282885432, + "grad_norm": 0.6913596987724304, + "learning_rate": 1.8943262013497082e-06, + "loss": 0.1017, + "step": 900 + }, + { + "epoch": 2.557284299858557, + "grad_norm": 1.0404833555221558, + "learning_rate": 1.782215965643364e-06, + "loss": 0.1047, + "step": 905 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 0.8003686666488647, + "learning_rate": 1.6733154603169177e-06, + "loss": 0.0953, + "step": 910 + }, + { + "epoch": 2.5855728429985856, + "grad_norm": 0.9547496438026428, + "learning_rate": 1.5676511301404861e-06, + "loss": 0.1065, + "step": 915 + }, + { + "epoch": 2.5997171145685996, + "grad_norm": 1.137930154800415, + "learning_rate": 1.465248634030103e-06, + "loss": 0.1021, + "step": 920 + }, + { + "epoch": 2.613861386138614, + "grad_norm": 0.826093316078186, + "learning_rate": 1.3661328388168358e-06, + "loss": 0.0979, + "step": 925 + }, + { + "epoch": 2.628005657708628, + "grad_norm": 0.9712331891059875, + "learning_rate": 1.2703278132082936e-06, + "loss": 0.1, + "step": 930 + }, + { + "epoch": 2.6421499292786423, + "grad_norm": 0.8392952084541321, + "learning_rate": 1.177856821943884e-06, + "loss": 0.1118, + "step": 935 + }, + { + "epoch": 2.6562942008486563, + "grad_norm": 0.8234127759933472, + "learning_rate": 1.0887423201453778e-06, + "loss": 0.1092, + "step": 940 + }, + { + "epoch": 2.6704384724186703, + "grad_norm": 0.8701257705688477, + "learning_rate": 1.0030059478640025e-06, + "loss": 0.0866, + "step": 945 + }, + { + "epoch": 2.6845827439886847, + "grad_norm": 0.8825739026069641, + "learning_rate": 9.206685248255248e-07, + "loss": 0.1025, + "step": 950 + }, + { + "epoch": 2.6987270155586986, + "grad_norm": 0.8649428486824036, + "learning_rate": 8.417500453744864e-07, + "loss": 0.1062, + "step": 955 + }, + { + "epoch": 2.7128712871287126, + "grad_norm": 1.0939172506332397, + "learning_rate": 7.662696736189129e-07, + "loss": 0.1073, + "step": 960 + }, + { + "epoch": 2.727015558698727, + "grad_norm": 0.7285016179084778, + "learning_rate": 6.942457387765977e-07, + "loss": 0.0968, + "step": 965 + }, + { + "epoch": 2.741159830268741, + "grad_norm": 0.7569419145584106, + "learning_rate": 6.256957307241396e-07, + "loss": 0.1036, + "step": 970 + }, + { + "epoch": 2.7553041018387554, + "grad_norm": 0.909615159034729, + "learning_rate": 5.606362957498195e-07, + "loss": 0.0951, + "step": 975 + }, + { + "epoch": 2.7694483734087694, + "grad_norm": 0.9221426248550415, + "learning_rate": 4.990832325112898e-07, + "loss": 0.1036, + "step": 980 + }, + { + "epoch": 2.783592644978784, + "grad_norm": 0.9270479083061218, + "learning_rate": 4.4105148819913564e-07, + "loss": 0.0957, + "step": 985 + }, + { + "epoch": 2.7977369165487977, + "grad_norm": 0.7594636678695679, + "learning_rate": 3.865551549071772e-07, + "loss": 0.1034, + "step": 990 + }, + { + "epoch": 2.8118811881188117, + "grad_norm": 0.8702991604804993, + "learning_rate": 3.3560746621043193e-07, + "loss": 0.0923, + "step": 995 + }, + { + "epoch": 2.826025459688826, + "grad_norm": 0.7041858434677124, + "learning_rate": 2.8822079395154357e-07, + "loss": 0.0959, + "step": 1000 + }, + { + "epoch": 2.84016973125884, + "grad_norm": 0.7188239097595215, + "learning_rate": 2.4440664523648017e-07, + "loss": 0.0744, + "step": 1005 + }, + { + "epoch": 2.854314002828854, + "grad_norm": 0.8437775373458862, + "learning_rate": 2.041756596402161e-07, + "loss": 0.097, + "step": 1010 + }, + { + "epoch": 2.8684582743988685, + "grad_norm": 0.6482008099555969, + "learning_rate": 1.6753760662307217e-07, + "loss": 0.0933, + "step": 1015 + }, + { + "epoch": 2.882602545968883, + "grad_norm": 0.6849322319030762, + "learning_rate": 1.3450138315836303e-07, + "loss": 0.0926, + "step": 1020 + }, + { + "epoch": 2.896746817538897, + "grad_norm": 0.8264719247817993, + "learning_rate": 1.050750115719057e-07, + "loss": 0.0891, + "step": 1025 + }, + { + "epoch": 2.910891089108911, + "grad_norm": 0.7517098784446716, + "learning_rate": 7.926563759392436e-08, + "loss": 0.0825, + "step": 1030 + }, + { + "epoch": 2.9250353606789252, + "grad_norm": 0.8773298859596252, + "learning_rate": 5.7079528623816824e-08, + "loss": 0.0966, + "step": 1035 + }, + { + "epoch": 2.939179632248939, + "grad_norm": 0.7876609563827515, + "learning_rate": 3.85220722082269e-08, + "loss": 0.1101, + "step": 1040 + }, + { + "epoch": 2.953323903818953, + "grad_norm": 0.8009656667709351, + "learning_rate": 2.3597774732750932e-08, + "loss": 0.1124, + "step": 1045 + }, + { + "epoch": 2.9674681753889676, + "grad_norm": 0.8071843981742859, + "learning_rate": 1.2310260327646483e-08, + "loss": 0.0831, + "step": 1050 + }, + { + "epoch": 2.9816124469589815, + "grad_norm": 0.7298785448074341, + "learning_rate": 4.662269987756318e-09, + "loss": 0.0946, + "step": 1055 + }, + { + "epoch": 2.9957567185289955, + "grad_norm": 0.7874497771263123, + "learning_rate": 6.556609069091613e-10, + "loss": 0.0987, + "step": 1060 + }, + { + "epoch": 3.0, + "step": 1062, + "total_flos": 1.3289249660741878e+18, + "train_loss": 0.4622165315596846, + "train_runtime": 566.114, + "train_samples_per_second": 59.919, + "train_steps_per_second": 1.876 + } + ], + "logging_steps": 5, + "max_steps": 1062, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3289249660741878e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dedfd260895ee336806557ce76d9e8b98e573185 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/15_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b9eea92a8469aefdc7a5a14e5f919c70742307c1b6eaa50192e1629fd11fa5 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c9f4e1d4d31a41d0b8eebadf1f482c86d71784f8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 16_128_e3_3e-5 + results: [] +--- + + + +# 16_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..199939ae9241bf3a4cf05415c0955a39cdc58c46 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "o_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8547188fda799b203f392074f007fb6eb389eda9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09eedea8b12e095c7a01941c2255b79f1f988ab44075afd17fba5acc7d4edeb5 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..019d3e01ae5893dd89e759c88691ec669fbbd86e --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4633061472419185e+18, + "train_loss": 0.4360632781838111, + "train_runtime": 640.9294, + "train_samples": 12500, + "train_samples_per_second": 58.509, + "train_steps_per_second": 1.83 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..019d3e01ae5893dd89e759c88691ec669fbbd86e --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4633061472419185e+18, + "train_loss": 0.4360632781838111, + "train_runtime": 640.9294, + "train_samples": 12500, + "train_samples_per_second": 58.509, + "train_steps_per_second": 1.83 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c96b79c23821f20c67e421139c3b113fa4f9dfa1 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1681 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1173, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01278772378516624, + "grad_norm": 0.6708197593688965, + "learning_rate": 2.033898305084746e-06, + "loss": 1.6484, + "step": 5 + }, + { + "epoch": 0.02557544757033248, + "grad_norm": 0.5924746990203857, + "learning_rate": 4.576271186440678e-06, + "loss": 1.6314, + "step": 10 + }, + { + "epoch": 0.03836317135549872, + "grad_norm": 0.6122991442680359, + "learning_rate": 7.1186440677966106e-06, + "loss": 1.6006, + "step": 15 + }, + { + "epoch": 0.05115089514066496, + "grad_norm": 0.5629028677940369, + "learning_rate": 9.661016949152542e-06, + "loss": 1.6363, + "step": 20 + }, + { + "epoch": 0.0639386189258312, + "grad_norm": 0.5088094472885132, + "learning_rate": 1.2203389830508475e-05, + "loss": 1.5907, + "step": 25 + }, + { + "epoch": 0.07672634271099744, + "grad_norm": 0.4828494191169739, + "learning_rate": 1.4745762711864408e-05, + "loss": 1.4708, + "step": 30 + }, + { + "epoch": 0.08951406649616368, + "grad_norm": 0.4376075863838196, + "learning_rate": 1.728813559322034e-05, + "loss": 1.5677, + "step": 35 + }, + { + "epoch": 0.10230179028132992, + "grad_norm": 0.48367223143577576, + "learning_rate": 1.983050847457627e-05, + "loss": 1.4916, + "step": 40 + }, + { + "epoch": 0.11508951406649616, + "grad_norm": 0.49526840448379517, + "learning_rate": 2.2372881355932205e-05, + "loss": 1.5044, + "step": 45 + }, + { + "epoch": 0.1278772378516624, + "grad_norm": 0.4703696668148041, + "learning_rate": 2.4915254237288138e-05, + "loss": 1.41, + "step": 50 + }, + { + "epoch": 0.14066496163682865, + "grad_norm": 0.5271356701850891, + "learning_rate": 2.7457627118644068e-05, + "loss": 1.4721, + "step": 55 + }, + { + "epoch": 0.1534526854219949, + "grad_norm": 0.559978723526001, + "learning_rate": 3e-05, + "loss": 1.4434, + "step": 60 + }, + { + "epoch": 0.16624040920716113, + "grad_norm": 0.546524703502655, + "learning_rate": 2.999850884276484e-05, + "loss": 1.4134, + "step": 65 + }, + { + "epoch": 0.17902813299232737, + "grad_norm": 0.5983802080154419, + "learning_rate": 2.999403566753267e-05, + "loss": 1.2958, + "step": 70 + }, + { + "epoch": 0.1918158567774936, + "grad_norm": 0.5582030415534973, + "learning_rate": 2.9986581363664512e-05, + "loss": 1.4038, + "step": 75 + }, + { + "epoch": 0.20460358056265984, + "grad_norm": 0.6500347256660461, + "learning_rate": 2.997614741323225e-05, + "loss": 1.3451, + "step": 80 + }, + { + "epoch": 0.21739130434782608, + "grad_norm": 0.627929151058197, + "learning_rate": 2.9962735890723977e-05, + "loss": 1.2111, + "step": 85 + }, + { + "epoch": 0.23017902813299232, + "grad_norm": 0.6794872879981995, + "learning_rate": 2.994634946263153e-05, + "loss": 1.2138, + "step": 90 + }, + { + "epoch": 0.24296675191815856, + "grad_norm": 0.6936468482017517, + "learning_rate": 2.9926991386920353e-05, + "loss": 1.2341, + "step": 95 + }, + { + "epoch": 0.2557544757033248, + "grad_norm": 0.6493759155273438, + "learning_rate": 2.9904665512381735e-05, + "loss": 1.248, + "step": 100 + }, + { + "epoch": 0.26854219948849106, + "grad_norm": 0.6922633051872253, + "learning_rate": 2.987937627786759e-05, + "loss": 1.1678, + "step": 105 + }, + { + "epoch": 0.2813299232736573, + "grad_norm": 0.7765576243400574, + "learning_rate": 2.985112871140792e-05, + "loss": 1.1653, + "step": 110 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.9598296284675598, + "learning_rate": 2.9819928429211133e-05, + "loss": 1.0831, + "step": 115 + }, + { + "epoch": 0.3069053708439898, + "grad_norm": 0.8309041857719421, + "learning_rate": 2.9785781634547438e-05, + "loss": 1.1585, + "step": 120 + }, + { + "epoch": 0.319693094629156, + "grad_norm": 0.8040237426757812, + "learning_rate": 2.9748695116515496e-05, + "loss": 1.1809, + "step": 125 + }, + { + "epoch": 0.33248081841432225, + "grad_norm": 0.7633826732635498, + "learning_rate": 2.970867624869259e-05, + "loss": 1.1325, + "step": 130 + }, + { + "epoch": 0.3452685421994885, + "grad_norm": 0.82603520154953, + "learning_rate": 2.9665732987668633e-05, + "loss": 1.0174, + "step": 135 + }, + { + "epoch": 0.35805626598465473, + "grad_norm": 0.8003732562065125, + "learning_rate": 2.9619873871464203e-05, + "loss": 1.0127, + "step": 140 + }, + { + "epoch": 0.37084398976982097, + "grad_norm": 0.8985899686813354, + "learning_rate": 2.957110801783303e-05, + "loss": 1.0162, + "step": 145 + }, + { + "epoch": 0.3836317135549872, + "grad_norm": 0.9121374487876892, + "learning_rate": 2.9519445122449174e-05, + "loss": 1.0246, + "step": 150 + }, + { + "epoch": 0.39641943734015345, + "grad_norm": 0.8764989376068115, + "learning_rate": 2.946489545697933e-05, + "loss": 0.9948, + "step": 155 + }, + { + "epoch": 0.4092071611253197, + "grad_norm": 1.0147840976715088, + "learning_rate": 2.9407469867040615e-05, + "loss": 0.9601, + "step": 160 + }, + { + "epoch": 0.4219948849104859, + "grad_norm": 0.9856529831886292, + "learning_rate": 2.9347179770044217e-05, + "loss": 0.946, + "step": 165 + }, + { + "epoch": 0.43478260869565216, + "grad_norm": 0.9447411894798279, + "learning_rate": 2.928403715292538e-05, + "loss": 0.8829, + "step": 170 + }, + { + "epoch": 0.4475703324808184, + "grad_norm": 1.0012602806091309, + "learning_rate": 2.921805456976016e-05, + "loss": 0.9323, + "step": 175 + }, + { + "epoch": 0.46035805626598464, + "grad_norm": 0.9783231019973755, + "learning_rate": 2.914924513926938e-05, + "loss": 0.898, + "step": 180 + }, + { + "epoch": 0.4731457800511509, + "grad_norm": 1.010514259338379, + "learning_rate": 2.9077622542210405e-05, + "loss": 0.9122, + "step": 185 + }, + { + "epoch": 0.4859335038363171, + "grad_norm": 1.0697084665298462, + "learning_rate": 2.9003201018657063e-05, + "loss": 0.8648, + "step": 190 + }, + { + "epoch": 0.49872122762148335, + "grad_norm": 1.0155504941940308, + "learning_rate": 2.8925995365168474e-05, + "loss": 0.8479, + "step": 195 + }, + { + "epoch": 0.5115089514066496, + "grad_norm": 1.0620810985565186, + "learning_rate": 2.8846020931847138e-05, + "loss": 0.857, + "step": 200 + }, + { + "epoch": 0.5242966751918159, + "grad_norm": 1.0387483835220337, + "learning_rate": 2.8763293619287032e-05, + "loss": 0.8479, + "step": 205 + }, + { + "epoch": 0.5370843989769821, + "grad_norm": 1.037909746170044, + "learning_rate": 2.867782987541225e-05, + "loss": 0.8879, + "step": 210 + }, + { + "epoch": 0.5498721227621484, + "grad_norm": 1.189785122871399, + "learning_rate": 2.85896466922068e-05, + "loss": 0.7603, + "step": 215 + }, + { + "epoch": 0.5626598465473146, + "grad_norm": 1.024579405784607, + "learning_rate": 2.849876160233623e-05, + "loss": 0.7762, + "step": 220 + }, + { + "epoch": 0.5754475703324808, + "grad_norm": 1.01882004737854, + "learning_rate": 2.8405192675661782e-05, + "loss": 0.8292, + "step": 225 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.0436152219772339, + "learning_rate": 2.830895851564773e-05, + "loss": 0.7545, + "step": 230 + }, + { + "epoch": 0.6010230179028133, + "grad_norm": 1.1654826402664185, + "learning_rate": 2.82100782556626e-05, + "loss": 0.7818, + "step": 235 + }, + { + "epoch": 0.6138107416879796, + "grad_norm": 1.1212129592895508, + "learning_rate": 2.810857155517507e-05, + "loss": 0.7311, + "step": 240 + }, + { + "epoch": 0.6265984654731458, + "grad_norm": 1.214046597480774, + "learning_rate": 2.8004458595845253e-05, + "loss": 0.7033, + "step": 245 + }, + { + "epoch": 0.639386189258312, + "grad_norm": 1.1791046857833862, + "learning_rate": 2.789776007751216e-05, + "loss": 0.724, + "step": 250 + }, + { + "epoch": 0.6521739130434783, + "grad_norm": 1.237434983253479, + "learning_rate": 2.778849721407814e-05, + "loss": 0.648, + "step": 255 + }, + { + "epoch": 0.6649616368286445, + "grad_norm": 1.2616337537765503, + "learning_rate": 2.7676691729291103e-05, + "loss": 0.6543, + "step": 260 + }, + { + "epoch": 0.6777493606138107, + "grad_norm": 1.3944177627563477, + "learning_rate": 2.756236585242539e-05, + "loss": 0.6693, + "step": 265 + }, + { + "epoch": 0.690537084398977, + "grad_norm": 1.1261426210403442, + "learning_rate": 2.744554231386213e-05, + "loss": 0.726, + "step": 270 + }, + { + "epoch": 0.7033248081841432, + "grad_norm": 1.0663907527923584, + "learning_rate": 2.732624434056996e-05, + "loss": 0.6646, + "step": 275 + }, + { + "epoch": 0.7161125319693095, + "grad_norm": 1.2588634490966797, + "learning_rate": 2.720449565148701e-05, + "loss": 0.6794, + "step": 280 + }, + { + "epoch": 0.7289002557544757, + "grad_norm": 1.134191632270813, + "learning_rate": 2.70803204528051e-05, + "loss": 0.7015, + "step": 285 + }, + { + "epoch": 0.7416879795396419, + "grad_norm": 1.196392297744751, + "learning_rate": 2.695374343315702e-05, + "loss": 0.5483, + "step": 290 + }, + { + "epoch": 0.7544757033248082, + "grad_norm": 1.404404878616333, + "learning_rate": 2.6824789758707913e-05, + "loss": 0.6296, + "step": 295 + }, + { + "epoch": 0.7672634271099744, + "grad_norm": 1.1996374130249023, + "learning_rate": 2.6693485068151756e-05, + "loss": 0.6763, + "step": 300 + }, + { + "epoch": 0.7800511508951407, + "grad_norm": 1.2228668928146362, + "learning_rate": 2.6559855467613774e-05, + "loss": 0.5915, + "step": 305 + }, + { + "epoch": 0.7928388746803069, + "grad_norm": 1.0508066415786743, + "learning_rate": 2.6423927525460067e-05, + "loss": 0.6225, + "step": 310 + }, + { + "epoch": 0.8056265984654731, + "grad_norm": 1.2691847085952759, + "learning_rate": 2.6285728267015212e-05, + "loss": 0.5802, + "step": 315 + }, + { + "epoch": 0.8184143222506394, + "grad_norm": 1.1992753744125366, + "learning_rate": 2.6145285169189106e-05, + "loss": 0.5354, + "step": 320 + }, + { + "epoch": 0.8312020460358056, + "grad_norm": 1.2690272331237793, + "learning_rate": 2.600262615501393e-05, + "loss": 0.613, + "step": 325 + }, + { + "epoch": 0.8439897698209718, + "grad_norm": 1.2552132606506348, + "learning_rate": 2.5857779588092513e-05, + "loss": 0.5931, + "step": 330 + }, + { + "epoch": 0.8567774936061381, + "grad_norm": 1.2072029113769531, + "learning_rate": 2.5710774266959015e-05, + "loss": 0.5015, + "step": 335 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 1.5112553834915161, + "learning_rate": 2.55616394193532e-05, + "loss": 0.5451, + "step": 340 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.2317116260528564, + "learning_rate": 2.541040469640934e-05, + "loss": 0.5527, + "step": 345 + }, + { + "epoch": 0.8951406649616368, + "grad_norm": 1.3959931135177612, + "learning_rate": 2.5257100166760942e-05, + "loss": 0.527, + "step": 350 + }, + { + "epoch": 0.907928388746803, + "grad_norm": 1.1681623458862305, + "learning_rate": 2.5101756310562493e-05, + "loss": 0.4798, + "step": 355 + }, + { + "epoch": 0.9207161125319693, + "grad_norm": 1.1768640279769897, + "learning_rate": 2.4944404013429323e-05, + "loss": 0.5226, + "step": 360 + }, + { + "epoch": 0.9335038363171355, + "grad_norm": 1.1384837627410889, + "learning_rate": 2.4785074560296953e-05, + "loss": 0.4897, + "step": 365 + }, + { + "epoch": 0.9462915601023018, + "grad_norm": 1.3082034587860107, + "learning_rate": 2.462379962920096e-05, + "loss": 0.5298, + "step": 370 + }, + { + "epoch": 0.959079283887468, + "grad_norm": 1.2532881498336792, + "learning_rate": 2.446061128497872e-05, + "loss": 0.5242, + "step": 375 + }, + { + "epoch": 0.9718670076726342, + "grad_norm": 1.2249971628189087, + "learning_rate": 2.429554197289426e-05, + "loss": 0.5255, + "step": 380 + }, + { + "epoch": 0.9846547314578005, + "grad_norm": 1.2708503007888794, + "learning_rate": 2.4128624512187444e-05, + "loss": 0.4544, + "step": 385 + }, + { + "epoch": 0.9974424552429667, + "grad_norm": 1.3073440790176392, + "learning_rate": 2.3959892089548844e-05, + "loss": 0.5898, + "step": 390 + }, + { + "epoch": 1.010230179028133, + "grad_norm": 1.26027250289917, + "learning_rate": 2.3789378252521497e-05, + "loss": 0.4547, + "step": 395 + }, + { + "epoch": 1.0230179028132993, + "grad_norm": 1.3944603204727173, + "learning_rate": 2.3617116902830967e-05, + "loss": 0.4017, + "step": 400 + }, + { + "epoch": 1.0358056265984654, + "grad_norm": 1.3283166885375977, + "learning_rate": 2.3443142289644987e-05, + "loss": 0.3536, + "step": 405 + }, + { + "epoch": 1.0485933503836318, + "grad_norm": 1.2551637887954712, + "learning_rate": 2.3267489002763977e-05, + "loss": 0.447, + "step": 410 + }, + { + "epoch": 1.061381074168798, + "grad_norm": 1.3790340423583984, + "learning_rate": 2.309019196574389e-05, + "loss": 0.4149, + "step": 415 + }, + { + "epoch": 1.0741687979539642, + "grad_norm": 1.2620220184326172, + "learning_rate": 2.2911286428952657e-05, + "loss": 0.4354, + "step": 420 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 1.2144255638122559, + "learning_rate": 2.2730807962561697e-05, + "loss": 0.3738, + "step": 425 + }, + { + "epoch": 1.0997442455242967, + "grad_norm": 1.2086050510406494, + "learning_rate": 2.25487924494738e-05, + "loss": 0.4076, + "step": 430 + }, + { + "epoch": 1.1125319693094629, + "grad_norm": 1.2504605054855347, + "learning_rate": 2.2365276078188864e-05, + "loss": 0.3662, + "step": 435 + }, + { + "epoch": 1.1253196930946292, + "grad_norm": 1.3527156114578247, + "learning_rate": 2.218029533560887e-05, + "loss": 0.3511, + "step": 440 + }, + { + "epoch": 1.1381074168797953, + "grad_norm": 1.2668946981430054, + "learning_rate": 2.19938869997835e-05, + "loss": 0.3876, + "step": 445 + }, + { + "epoch": 1.1508951406649617, + "grad_norm": 1.2953968048095703, + "learning_rate": 2.1806088132597914e-05, + "loss": 0.3339, + "step": 450 + }, + { + "epoch": 1.1636828644501278, + "grad_norm": 1.3277809619903564, + "learning_rate": 2.161693607240405e-05, + "loss": 0.3962, + "step": 455 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 1.3776410818099976, + "learning_rate": 2.142646842659699e-05, + "loss": 0.4055, + "step": 460 + }, + { + "epoch": 1.1892583120204603, + "grad_norm": 1.2561861276626587, + "learning_rate": 2.1234723064137814e-05, + "loss": 0.3588, + "step": 465 + }, + { + "epoch": 1.2020460358056266, + "grad_norm": 1.1957110166549683, + "learning_rate": 2.1041738108024463e-05, + "loss": 0.3313, + "step": 470 + }, + { + "epoch": 1.2148337595907928, + "grad_norm": 1.1657942533493042, + "learning_rate": 2.084755192771208e-05, + "loss": 0.3322, + "step": 475 + }, + { + "epoch": 1.227621483375959, + "grad_norm": 1.2739216089248657, + "learning_rate": 2.0652203131484365e-05, + "loss": 0.326, + "step": 480 + }, + { + "epoch": 1.2404092071611252, + "grad_norm": 1.3207424879074097, + "learning_rate": 2.0455730558777427e-05, + "loss": 0.3135, + "step": 485 + }, + { + "epoch": 1.2531969309462916, + "grad_norm": 1.1956088542938232, + "learning_rate": 2.0258173272457724e-05, + "loss": 0.3378, + "step": 490 + }, + { + "epoch": 1.265984654731458, + "grad_norm": 1.2110832929611206, + "learning_rate": 2.005957055105548e-05, + "loss": 0.3117, + "step": 495 + }, + { + "epoch": 1.278772378516624, + "grad_norm": 1.2596226930618286, + "learning_rate": 1.9859961880955373e-05, + "loss": 0.3597, + "step": 500 + }, + { + "epoch": 1.2915601023017902, + "grad_norm": 1.2189385890960693, + "learning_rate": 1.965938694854575e-05, + "loss": 0.3177, + "step": 505 + }, + { + "epoch": 1.3043478260869565, + "grad_norm": 1.368977665901184, + "learning_rate": 1.9457885632328155e-05, + "loss": 0.3315, + "step": 510 + }, + { + "epoch": 1.317135549872123, + "grad_norm": 1.1617172956466675, + "learning_rate": 1.9255497994988672e-05, + "loss": 0.273, + "step": 515 + }, + { + "epoch": 1.329923273657289, + "grad_norm": 1.3656171560287476, + "learning_rate": 1.9052264275432602e-05, + "loss": 0.2752, + "step": 520 + }, + { + "epoch": 1.3427109974424551, + "grad_norm": 1.1303025484085083, + "learning_rate": 1.8848224880784106e-05, + "loss": 0.3061, + "step": 525 + }, + { + "epoch": 1.3554987212276215, + "grad_norm": 1.2621397972106934, + "learning_rate": 1.8643420378352484e-05, + "loss": 0.2975, + "step": 530 + }, + { + "epoch": 1.3682864450127878, + "grad_norm": 1.2710834741592407, + "learning_rate": 1.843789148756647e-05, + "loss": 0.2851, + "step": 535 + }, + { + "epoch": 1.381074168797954, + "grad_norm": 1.2034056186676025, + "learning_rate": 1.8231679071878406e-05, + "loss": 0.2584, + "step": 540 + }, + { + "epoch": 1.39386189258312, + "grad_norm": 1.2052263021469116, + "learning_rate": 1.8024824130639707e-05, + "loss": 0.2808, + "step": 545 + }, + { + "epoch": 1.4066496163682864, + "grad_norm": 1.2951470613479614, + "learning_rate": 1.7817367790949344e-05, + "loss": 0.2455, + "step": 550 + }, + { + "epoch": 1.4194373401534528, + "grad_norm": 1.2461003065109253, + "learning_rate": 1.7609351299476898e-05, + "loss": 0.2885, + "step": 555 + }, + { + "epoch": 1.432225063938619, + "grad_norm": 1.2099628448486328, + "learning_rate": 1.740081601426188e-05, + "loss": 0.2878, + "step": 560 + }, + { + "epoch": 1.445012787723785, + "grad_norm": 1.3209744691848755, + "learning_rate": 1.719180339649087e-05, + "loss": 0.3, + "step": 565 + }, + { + "epoch": 1.4578005115089514, + "grad_norm": 1.4368692636489868, + "learning_rate": 1.698235500225416e-05, + "loss": 0.2705, + "step": 570 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.3936057090759277, + "learning_rate": 1.6772512474283548e-05, + "loss": 0.3109, + "step": 575 + }, + { + "epoch": 1.4833759590792839, + "grad_norm": 1.3175395727157593, + "learning_rate": 1.6562317533672877e-05, + "loss": 0.2485, + "step": 580 + }, + { + "epoch": 1.49616368286445, + "grad_norm": 1.3272281885147095, + "learning_rate": 1.6351811971583008e-05, + "loss": 0.2509, + "step": 585 + }, + { + "epoch": 1.5089514066496164, + "grad_norm": 1.1471914052963257, + "learning_rate": 1.6141037640932882e-05, + "loss": 0.2834, + "step": 590 + }, + { + "epoch": 1.5217391304347827, + "grad_norm": 1.3747196197509766, + "learning_rate": 1.5930036448078234e-05, + "loss": 0.1966, + "step": 595 + }, + { + "epoch": 1.5345268542199488, + "grad_norm": 1.1667641401290894, + "learning_rate": 1.5718850344479778e-05, + "loss": 0.2451, + "step": 600 + }, + { + "epoch": 1.547314578005115, + "grad_norm": 1.1550166606903076, + "learning_rate": 1.5507521318362372e-05, + "loss": 0.2576, + "step": 605 + }, + { + "epoch": 1.5601023017902813, + "grad_norm": 1.1271246671676636, + "learning_rate": 1.529609138636685e-05, + "loss": 0.2258, + "step": 610 + }, + { + "epoch": 1.5728900255754477, + "grad_norm": 1.2951598167419434, + "learning_rate": 1.5084602585196249e-05, + "loss": 0.2278, + "step": 615 + }, + { + "epoch": 1.5856777493606138, + "grad_norm": 1.2627760171890259, + "learning_rate": 1.4873096963258052e-05, + "loss": 0.2269, + "step": 620 + }, + { + "epoch": 1.59846547314578, + "grad_norm": 1.3025908470153809, + "learning_rate": 1.4661616572304036e-05, + "loss": 0.2389, + "step": 625 + }, + { + "epoch": 1.6112531969309463, + "grad_norm": 1.318843960762024, + "learning_rate": 1.445020345906955e-05, + "loss": 0.2206, + "step": 630 + }, + { + "epoch": 1.6240409207161126, + "grad_norm": 1.26567542552948, + "learning_rate": 1.423889965691372e-05, + "loss": 0.2221, + "step": 635 + }, + { + "epoch": 1.6368286445012787, + "grad_norm": 1.1460903882980347, + "learning_rate": 1.4027747177462318e-05, + "loss": 0.2143, + "step": 640 + }, + { + "epoch": 1.6496163682864449, + "grad_norm": 1.1667035818099976, + "learning_rate": 1.3816788002255019e-05, + "loss": 0.2266, + "step": 645 + }, + { + "epoch": 1.6624040920716112, + "grad_norm": 1.2197387218475342, + "learning_rate": 1.3606064074398544e-05, + "loss": 0.2299, + "step": 650 + }, + { + "epoch": 1.6751918158567776, + "grad_norm": 1.4384185075759888, + "learning_rate": 1.3395617290227505e-05, + "loss": 0.2475, + "step": 655 + }, + { + "epoch": 1.6879795396419437, + "grad_norm": 1.104884386062622, + "learning_rate": 1.3185489490974556e-05, + "loss": 0.2123, + "step": 660 + }, + { + "epoch": 1.7007672634271098, + "grad_norm": 1.1767092943191528, + "learning_rate": 1.2975722454451454e-05, + "loss": 0.2413, + "step": 665 + }, + { + "epoch": 1.7135549872122762, + "grad_norm": 1.2895209789276123, + "learning_rate": 1.2766357886742744e-05, + "loss": 0.231, + "step": 670 + }, + { + "epoch": 1.7263427109974425, + "grad_norm": 1.4212790727615356, + "learning_rate": 1.2557437413913767e-05, + "loss": 0.2357, + "step": 675 + }, + { + "epoch": 1.7391304347826086, + "grad_norm": 1.0976759195327759, + "learning_rate": 1.2349002573734469e-05, + "loss": 0.1906, + "step": 680 + }, + { + "epoch": 1.7519181585677748, + "grad_norm": 1.2768397331237793, + "learning_rate": 1.214109480742084e-05, + "loss": 0.2264, + "step": 685 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 1.154052495956421, + "learning_rate": 1.1933755451395556e-05, + "loss": 0.1991, + "step": 690 + }, + { + "epoch": 1.7774936061381075, + "grad_norm": 1.2788300514221191, + "learning_rate": 1.17270257290694e-05, + "loss": 0.1955, + "step": 695 + }, + { + "epoch": 1.7902813299232738, + "grad_norm": 1.3075954914093018, + "learning_rate": 1.1520946742645184e-05, + "loss": 0.2297, + "step": 700 + }, + { + "epoch": 1.80306905370844, + "grad_norm": 1.1742579936981201, + "learning_rate": 1.13155594649458e-05, + "loss": 0.221, + "step": 705 + }, + { + "epoch": 1.815856777493606, + "grad_norm": 1.3092479705810547, + "learning_rate": 1.111090473126793e-05, + "loss": 0.195, + "step": 710 + }, + { + "epoch": 1.8286445012787724, + "grad_norm": 1.099041223526001, + "learning_rate": 1.0907023231263158e-05, + "loss": 0.1925, + "step": 715 + }, + { + "epoch": 1.8414322250639388, + "grad_norm": 1.1077823638916016, + "learning_rate": 1.0703955500847993e-05, + "loss": 0.1685, + "step": 720 + }, + { + "epoch": 1.854219948849105, + "grad_norm": 1.2507820129394531, + "learning_rate": 1.050174191414449e-05, + "loss": 0.1586, + "step": 725 + }, + { + "epoch": 1.867007672634271, + "grad_norm": 1.124650478363037, + "learning_rate": 1.0300422675453038e-05, + "loss": 0.2043, + "step": 730 + }, + { + "epoch": 1.8797953964194374, + "grad_norm": 1.2655360698699951, + "learning_rate": 1.0100037811258878e-05, + "loss": 0.1762, + "step": 735 + }, + { + "epoch": 1.8925831202046037, + "grad_norm": 1.4875901937484741, + "learning_rate": 9.900627162274017e-06, + "loss": 0.1825, + "step": 740 + }, + { + "epoch": 1.9053708439897699, + "grad_norm": 1.212731957435608, + "learning_rate": 9.702230375516064e-06, + "loss": 0.17, + "step": 745 + }, + { + "epoch": 1.918158567774936, + "grad_norm": 1.3379333019256592, + "learning_rate": 9.504886896425545e-06, + "loss": 0.1647, + "step": 750 + }, + { + "epoch": 1.9309462915601023, + "grad_norm": 1.3294923305511475, + "learning_rate": 9.308635961023348e-06, + "loss": 0.1584, + "step": 755 + }, + { + "epoch": 1.9437340153452687, + "grad_norm": 1.1788673400878906, + "learning_rate": 9.113516588109773e-06, + "loss": 0.177, + "step": 760 + }, + { + "epoch": 1.9565217391304348, + "grad_norm": 1.1877528429031372, + "learning_rate": 8.919567571506777e-06, + "loss": 0.1847, + "step": 765 + }, + { + "epoch": 1.969309462915601, + "grad_norm": 1.0377087593078613, + "learning_rate": 8.72682747234493e-06, + "loss": 0.1691, + "step": 770 + }, + { + "epoch": 1.9820971867007673, + "grad_norm": 1.0700905323028564, + "learning_rate": 8.53533461139669e-06, + "loss": 0.1462, + "step": 775 + }, + { + "epoch": 1.9948849104859336, + "grad_norm": 1.177263617515564, + "learning_rate": 8.3451270614574e-06, + "loss": 0.1729, + "step": 780 + }, + { + "epoch": 2.0076726342710995, + "grad_norm": 1.0120400190353394, + "learning_rate": 8.15624263977563e-06, + "loss": 0.1406, + "step": 785 + }, + { + "epoch": 2.020460358056266, + "grad_norm": 0.9787353873252869, + "learning_rate": 7.968718900534311e-06, + "loss": 0.1385, + "step": 790 + }, + { + "epoch": 2.0332480818414322, + "grad_norm": 1.1840087175369263, + "learning_rate": 7.782593127384184e-06, + "loss": 0.1254, + "step": 795 + }, + { + "epoch": 2.0460358056265986, + "grad_norm": 0.9159743189811707, + "learning_rate": 7.597902326031018e-06, + "loss": 0.1295, + "step": 800 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 1.82992684841156, + "learning_rate": 7.4146832168781085e-06, + "loss": 0.1323, + "step": 805 + }, + { + "epoch": 2.071611253196931, + "grad_norm": 1.0008872747421265, + "learning_rate": 7.232972227725485e-06, + "loss": 0.127, + "step": 810 + }, + { + "epoch": 2.084398976982097, + "grad_norm": 0.9727573990821838, + "learning_rate": 7.052805486527307e-06, + "loss": 0.1021, + "step": 815 + }, + { + "epoch": 2.0971867007672635, + "grad_norm": 0.9443820714950562, + "learning_rate": 6.874218814208863e-06, + "loss": 0.1358, + "step": 820 + }, + { + "epoch": 2.10997442455243, + "grad_norm": 1.031628131866455, + "learning_rate": 6.6972477175446255e-06, + "loss": 0.1203, + "step": 825 + }, + { + "epoch": 2.122762148337596, + "grad_norm": 1.0193157196044922, + "learning_rate": 6.521927382098753e-06, + "loss": 0.1291, + "step": 830 + }, + { + "epoch": 2.135549872122762, + "grad_norm": 1.1664550304412842, + "learning_rate": 6.3482926652294695e-06, + "loss": 0.1332, + "step": 835 + }, + { + "epoch": 2.1483375959079285, + "grad_norm": 0.8780680894851685, + "learning_rate": 6.176378089158686e-06, + "loss": 0.1275, + "step": 840 + }, + { + "epoch": 2.1611253196930944, + "grad_norm": 0.9752290844917297, + "learning_rate": 6.006217834108261e-06, + "loss": 0.1131, + "step": 845 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 1.1901729106903076, + "learning_rate": 5.8378457315042576e-06, + "loss": 0.1042, + "step": 850 + }, + { + "epoch": 2.186700767263427, + "grad_norm": 1.1220062971115112, + "learning_rate": 5.671295257250537e-06, + "loss": 0.1128, + "step": 855 + }, + { + "epoch": 2.1994884910485935, + "grad_norm": 1.0667715072631836, + "learning_rate": 5.506599525073064e-06, + "loss": 0.1159, + "step": 860 + }, + { + "epoch": 2.21227621483376, + "grad_norm": 1.1593586206436157, + "learning_rate": 5.343791279936189e-06, + "loss": 0.1423, + "step": 865 + }, + { + "epoch": 2.2250639386189257, + "grad_norm": 1.3152402639389038, + "learning_rate": 5.182902891532267e-06, + "loss": 0.1321, + "step": 870 + }, + { + "epoch": 2.237851662404092, + "grad_norm": 1.1431974172592163, + "learning_rate": 5.023966347845892e-06, + "loss": 0.1168, + "step": 875 + }, + { + "epoch": 2.2506393861892584, + "grad_norm": 0.925300121307373, + "learning_rate": 4.867013248794e-06, + "loss": 0.1025, + "step": 880 + }, + { + "epoch": 2.2634271099744243, + "grad_norm": 0.8030281066894531, + "learning_rate": 4.712074799943158e-06, + "loss": 0.0956, + "step": 885 + }, + { + "epoch": 2.2762148337595907, + "grad_norm": 1.179619312286377, + "learning_rate": 4.5591818063052315e-06, + "loss": 0.116, + "step": 890 + }, + { + "epoch": 2.289002557544757, + "grad_norm": 0.837775468826294, + "learning_rate": 4.408364666212712e-06, + "loss": 0.0973, + "step": 895 + }, + { + "epoch": 2.3017902813299234, + "grad_norm": 1.0384538173675537, + "learning_rate": 4.2596533652748836e-06, + "loss": 0.1182, + "step": 900 + }, + { + "epoch": 2.3145780051150897, + "grad_norm": 1.07869291305542, + "learning_rate": 4.113077470416057e-06, + "loss": 0.1167, + "step": 905 + }, + { + "epoch": 2.3273657289002556, + "grad_norm": 0.9032118320465088, + "learning_rate": 3.9686661239970466e-06, + "loss": 0.1282, + "step": 910 + }, + { + "epoch": 2.340153452685422, + "grad_norm": 0.9700736403465271, + "learning_rate": 3.8264480380210686e-06, + "loss": 0.1029, + "step": 915 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.8310509324073792, + "learning_rate": 3.6864514884251648e-06, + "loss": 0.0866, + "step": 920 + }, + { + "epoch": 2.3657289002557547, + "grad_norm": 0.9454623460769653, + "learning_rate": 3.5487043094583756e-06, + "loss": 0.104, + "step": 925 + }, + { + "epoch": 2.3785166240409206, + "grad_norm": 1.1522465944290161, + "learning_rate": 3.413233888147715e-06, + "loss": 0.0998, + "step": 930 + }, + { + "epoch": 2.391304347826087, + "grad_norm": 0.90438312292099, + "learning_rate": 3.280067158853034e-06, + "loss": 0.1041, + "step": 935 + }, + { + "epoch": 2.4040920716112533, + "grad_norm": 0.9750096797943115, + "learning_rate": 3.149230597911907e-06, + "loss": 0.1134, + "step": 940 + }, + { + "epoch": 2.4168797953964196, + "grad_norm": 0.8599918484687805, + "learning_rate": 3.020750218375605e-06, + "loss": 0.0889, + "step": 945 + }, + { + "epoch": 2.4296675191815855, + "grad_norm": 0.9393445253372192, + "learning_rate": 2.8946515648371303e-06, + "loss": 0.0948, + "step": 950 + }, + { + "epoch": 2.442455242966752, + "grad_norm": 0.8611547350883484, + "learning_rate": 2.770959708352418e-06, + "loss": 0.0973, + "step": 955 + }, + { + "epoch": 2.455242966751918, + "grad_norm": 1.2510979175567627, + "learning_rate": 2.6496992414557053e-06, + "loss": 0.1086, + "step": 960 + }, + { + "epoch": 2.4680306905370846, + "grad_norm": 0.8778414726257324, + "learning_rate": 2.530894273270002e-06, + "loss": 0.1161, + "step": 965 + }, + { + "epoch": 2.4808184143222505, + "grad_norm": 0.8699662685394287, + "learning_rate": 2.4145684247136807e-06, + "loss": 0.1014, + "step": 970 + }, + { + "epoch": 2.493606138107417, + "grad_norm": 0.9918726682662964, + "learning_rate": 2.300744823804181e-06, + "loss": 0.1004, + "step": 975 + }, + { + "epoch": 2.506393861892583, + "grad_norm": 0.8336577415466309, + "learning_rate": 2.1894461010596396e-06, + "loss": 0.0953, + "step": 980 + }, + { + "epoch": 2.5191815856777495, + "grad_norm": 0.8095061779022217, + "learning_rate": 2.080694384999469e-06, + "loss": 0.0905, + "step": 985 + }, + { + "epoch": 2.531969309462916, + "grad_norm": 0.759183943271637, + "learning_rate": 1.974511297744782e-06, + "loss": 0.084, + "step": 990 + }, + { + "epoch": 2.544757033248082, + "grad_norm": 0.9603663682937622, + "learning_rate": 1.8709179507194158e-06, + "loss": 0.0951, + "step": 995 + }, + { + "epoch": 2.557544757033248, + "grad_norm": 0.7357624173164368, + "learning_rate": 1.769934940452554e-06, + "loss": 0.1016, + "step": 1000 + }, + { + "epoch": 2.5703324808184145, + "grad_norm": 0.812207043170929, + "learning_rate": 1.6715823444837241e-06, + "loss": 0.0888, + "step": 1005 + }, + { + "epoch": 2.5831202046035804, + "grad_norm": 0.8508069515228271, + "learning_rate": 1.5758797173709327e-06, + "loss": 0.1098, + "step": 1010 + }, + { + "epoch": 2.5959079283887467, + "grad_norm": 0.7347028851509094, + "learning_rate": 1.4828460868028277e-06, + "loss": 0.0944, + "step": 1015 + }, + { + "epoch": 2.608695652173913, + "grad_norm": 0.6602727174758911, + "learning_rate": 1.3924999498155832e-06, + "loss": 0.078, + "step": 1020 + }, + { + "epoch": 2.6214833759590794, + "grad_norm": 0.7060046195983887, + "learning_rate": 1.3048592691153137e-06, + "loss": 0.1022, + "step": 1025 + }, + { + "epoch": 2.634271099744246, + "grad_norm": 0.730272650718689, + "learning_rate": 1.2199414695067001e-06, + "loss": 0.0818, + "step": 1030 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9762530326843262, + "learning_rate": 1.1377634344285826e-06, + "loss": 0.0964, + "step": 1035 + }, + { + "epoch": 2.659846547314578, + "grad_norm": 0.8229208588600159, + "learning_rate": 1.0583415025971693e-06, + "loss": 0.096, + "step": 1040 + }, + { + "epoch": 2.6726342710997444, + "grad_norm": 0.635673999786377, + "learning_rate": 9.816914647575653e-07, + "loss": 0.1042, + "step": 1045 + }, + { + "epoch": 2.6854219948849103, + "grad_norm": 0.836639404296875, + "learning_rate": 9.078285605442365e-07, + "loss": 0.1065, + "step": 1050 + }, + { + "epoch": 2.6982097186700766, + "grad_norm": 0.730293869972229, + "learning_rate": 8.36767475451054e-07, + "loss": 0.0955, + "step": 1055 + }, + { + "epoch": 2.710997442455243, + "grad_norm": 0.9207515120506287, + "learning_rate": 7.685223379115075e-07, + "loss": 0.11, + "step": 1060 + }, + { + "epoch": 2.7237851662404093, + "grad_norm": 0.8018400073051453, + "learning_rate": 7.031067164896776e-07, + "loss": 0.0899, + "step": 1065 + }, + { + "epoch": 2.7365728900255757, + "grad_norm": 0.780031681060791, + "learning_rate": 6.405336171825222e-07, + "loss": 0.0981, + "step": 1070 + }, + { + "epoch": 2.7493606138107416, + "grad_norm": 0.7321682572364807, + "learning_rate": 5.808154808340077e-07, + "loss": 0.0831, + "step": 1075 + }, + { + "epoch": 2.762148337595908, + "grad_norm": 0.6939735412597656, + "learning_rate": 5.239641806616119e-07, + "loss": 0.08, + "step": 1080 + }, + { + "epoch": 2.7749360613810743, + "grad_norm": 0.8114351630210876, + "learning_rate": 4.6999101989568136e-07, + "loss": 0.087, + "step": 1085 + }, + { + "epoch": 2.78772378516624, + "grad_norm": 0.7863523364067078, + "learning_rate": 4.1890672953210475e-07, + "loss": 0.0798, + "step": 1090 + }, + { + "epoch": 2.8005115089514065, + "grad_norm": 0.7235757112503052, + "learning_rate": 3.70721466198774e-07, + "loss": 0.0945, + "step": 1095 + }, + { + "epoch": 2.813299232736573, + "grad_norm": 0.8924123048782349, + "learning_rate": 3.2544481013622673e-07, + "loss": 0.0945, + "step": 1100 + }, + { + "epoch": 2.8260869565217392, + "grad_norm": 0.7142847776412964, + "learning_rate": 2.8308576329290125e-07, + "loss": 0.0839, + "step": 1105 + }, + { + "epoch": 2.8388746803069056, + "grad_norm": 0.7913419604301453, + "learning_rate": 2.436527475353517e-07, + "loss": 0.0894, + "step": 1110 + }, + { + "epoch": 2.8516624040920715, + "grad_norm": 0.818788468837738, + "learning_rate": 2.0715360297381746e-07, + "loss": 0.0926, + "step": 1115 + }, + { + "epoch": 2.864450127877238, + "grad_norm": 0.9289286136627197, + "learning_rate": 1.735955864034233e-07, + "loss": 0.0883, + "step": 1120 + }, + { + "epoch": 2.877237851662404, + "grad_norm": 0.7062421441078186, + "learning_rate": 1.4298536986139865e-07, + "loss": 0.0834, + "step": 1125 + }, + { + "epoch": 2.89002557544757, + "grad_norm": 0.7538238763809204, + "learning_rate": 1.1532903930053018e-07, + "loss": 0.1026, + "step": 1130 + }, + { + "epoch": 2.9028132992327365, + "grad_norm": 0.7157092094421387, + "learning_rate": 9.063209337913492e-08, + "loss": 0.0992, + "step": 1135 + }, + { + "epoch": 2.915601023017903, + "grad_norm": 0.8148689270019531, + "learning_rate": 6.889944236782631e-08, + "loss": 0.0844, + "step": 1140 + }, + { + "epoch": 2.928388746803069, + "grad_norm": 0.8606677651405334, + "learning_rate": 5.0135407173245163e-08, + "loss": 0.0823, + "step": 1145 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.77100670337677, + "learning_rate": 3.434371847897022e-08, + "loss": 0.0988, + "step": 1150 + }, + { + "epoch": 2.9539641943734014, + "grad_norm": 0.8226373195648193, + "learning_rate": 2.1527516003781443e-08, + "loss": 0.0953, + "step": 1155 + }, + { + "epoch": 2.9667519181585678, + "grad_norm": 0.7256613969802856, + "learning_rate": 1.1689347877419377e-08, + "loss": 0.1076, + "step": 1160 + }, + { + "epoch": 2.979539641943734, + "grad_norm": 0.7007017731666565, + "learning_rate": 4.831170133960394e-09, + "loss": 0.0828, + "step": 1165 + }, + { + "epoch": 2.9923273657289, + "grad_norm": 0.7223325967788696, + "learning_rate": 9.543463229177984e-10, + "loss": 0.089, + "step": 1170 + }, + { + "epoch": 3.0, + "step": 1173, + "total_flos": 1.4633061472419185e+18, + "train_loss": 0.4360632781838111, + "train_runtime": 640.9294, + "train_samples_per_second": 58.509, + "train_steps_per_second": 1.83 + } + ], + "logging_steps": 5, + "max_steps": 1173, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4633061472419185e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbb020d5c9a82fbc16fd7fd34e1d54855d3f4a88 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/16_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f726917afce5c080946d11e533b2e021de3d58556d793dbe02be1ac8a171a92f +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1303062eccff60e561dd4ea30b4b11d2cd77acf7 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 17_128_e3_3e-5 + results: [] +--- + + + +# 17_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6c0e0f436cc6f990819e094235cf4e90df5447c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "down_proj", + "v_proj", + "gate_proj", + "q_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f1374b10d9b389f1719215fd5aa88f51b166aa0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908b54870ed0d0cc48505c5b3aeb2e3e47d3300e0ccd6d9efe76239cd31baace +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dec95c856165310a260201ca75ad700009ff146f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.6798389506943222e+18, + "train_loss": 0.437512752876742, + "train_runtime": 720.9786, + "train_samples": 14138, + "train_samples_per_second": 58.828, + "train_steps_per_second": 1.839 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dec95c856165310a260201ca75ad700009ff146f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.6798389506943222e+18, + "train_loss": 0.437512752876742, + "train_runtime": 720.9786, + "train_samples": 14138, + "train_samples_per_second": 58.828, + "train_steps_per_second": 1.839 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1bfbe40a70115d0e7ed89f3dc957f37477941f79 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1898 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1326, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011312217194570135, + "grad_norm": 0.6517213582992554, + "learning_rate": 1.791044776119403e-06, + "loss": 1.645, + "step": 5 + }, + { + "epoch": 0.02262443438914027, + "grad_norm": 0.6654043793678284, + "learning_rate": 4.029850746268657e-06, + "loss": 1.6306, + "step": 10 + }, + { + "epoch": 0.033936651583710405, + "grad_norm": 0.6435283422470093, + "learning_rate": 6.268656716417911e-06, + "loss": 1.6684, + "step": 15 + }, + { + "epoch": 0.04524886877828054, + "grad_norm": 0.5259317755699158, + "learning_rate": 8.507462686567164e-06, + "loss": 1.6181, + "step": 20 + }, + { + "epoch": 0.05656108597285068, + "grad_norm": 0.5409759283065796, + "learning_rate": 1.0746268656716418e-05, + "loss": 1.6287, + "step": 25 + }, + { + "epoch": 0.06787330316742081, + "grad_norm": 0.498606413602829, + "learning_rate": 1.2985074626865672e-05, + "loss": 1.6214, + "step": 30 + }, + { + "epoch": 0.07918552036199095, + "grad_norm": 0.4658817648887634, + "learning_rate": 1.5223880597014927e-05, + "loss": 1.589, + "step": 35 + }, + { + "epoch": 0.09049773755656108, + "grad_norm": 0.4615864157676697, + "learning_rate": 1.746268656716418e-05, + "loss": 1.5534, + "step": 40 + }, + { + "epoch": 0.10180995475113122, + "grad_norm": 0.5203776955604553, + "learning_rate": 1.9701492537313435e-05, + "loss": 1.6377, + "step": 45 + }, + { + "epoch": 0.11312217194570136, + "grad_norm": 0.4978821873664856, + "learning_rate": 2.194029850746269e-05, + "loss": 1.5162, + "step": 50 + }, + { + "epoch": 0.1244343891402715, + "grad_norm": 0.4931459426879883, + "learning_rate": 2.417910447761194e-05, + "loss": 1.5408, + "step": 55 + }, + { + "epoch": 0.13574660633484162, + "grad_norm": 0.5344429612159729, + "learning_rate": 2.6417910447761193e-05, + "loss": 1.4441, + "step": 60 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 0.580712080001831, + "learning_rate": 2.8656716417910447e-05, + "loss": 1.4668, + "step": 65 + }, + { + "epoch": 0.1583710407239819, + "grad_norm": 0.6279180645942688, + "learning_rate": 2.9999813203541335e-05, + "loss": 1.4357, + "step": 70 + }, + { + "epoch": 0.16968325791855204, + "grad_norm": 0.6121801733970642, + "learning_rate": 2.9997711796810774e-05, + "loss": 1.389, + "step": 75 + }, + { + "epoch": 0.18099547511312217, + "grad_norm": 0.5477138161659241, + "learning_rate": 2.9993275815975943e-05, + "loss": 1.3605, + "step": 80 + }, + { + "epoch": 0.19230769230769232, + "grad_norm": 0.6352965831756592, + "learning_rate": 2.9986505951550574e-05, + "loss": 1.3698, + "step": 85 + }, + { + "epoch": 0.20361990950226244, + "grad_norm": 0.6573954820632935, + "learning_rate": 2.9977403257345435e-05, + "loss": 1.3599, + "step": 90 + }, + { + "epoch": 0.2149321266968326, + "grad_norm": 0.6235343217849731, + "learning_rate": 2.996596915030429e-05, + "loss": 1.2814, + "step": 95 + }, + { + "epoch": 0.22624434389140272, + "grad_norm": 0.5989724397659302, + "learning_rate": 2.995220541028333e-05, + "loss": 1.3335, + "step": 100 + }, + { + "epoch": 0.23755656108597284, + "grad_norm": 0.6992103457450867, + "learning_rate": 2.9936114179774118e-05, + "loss": 1.2714, + "step": 105 + }, + { + "epoch": 0.248868778280543, + "grad_norm": 0.6993058323860168, + "learning_rate": 2.991769796357009e-05, + "loss": 1.2289, + "step": 110 + }, + { + "epoch": 0.26018099547511314, + "grad_norm": 0.7817025780677795, + "learning_rate": 2.9896959628376653e-05, + "loss": 1.2258, + "step": 115 + }, + { + "epoch": 0.27149321266968324, + "grad_norm": 0.7459224462509155, + "learning_rate": 2.987390240236494e-05, + "loss": 1.2034, + "step": 120 + }, + { + "epoch": 0.2828054298642534, + "grad_norm": 0.7422584295272827, + "learning_rate": 2.984852987466931e-05, + "loss": 1.1956, + "step": 125 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.7807568907737732, + "learning_rate": 2.982084599482867e-05, + "loss": 1.1751, + "step": 130 + }, + { + "epoch": 0.3054298642533937, + "grad_norm": 0.9192548394203186, + "learning_rate": 2.979085507217165e-05, + "loss": 1.1872, + "step": 135 + }, + { + "epoch": 0.3167420814479638, + "grad_norm": 0.7574413418769836, + "learning_rate": 2.9758561775145837e-05, + "loss": 1.1114, + "step": 140 + }, + { + "epoch": 0.32805429864253394, + "grad_norm": 0.9246029853820801, + "learning_rate": 2.9723971130591053e-05, + "loss": 1.0558, + "step": 145 + }, + { + "epoch": 0.3393665158371041, + "grad_norm": 0.8904193043708801, + "learning_rate": 2.9687088522956894e-05, + "loss": 1.1113, + "step": 150 + }, + { + "epoch": 0.3506787330316742, + "grad_norm": 0.895426869392395, + "learning_rate": 2.9647919693464532e-05, + "loss": 1.0484, + "step": 155 + }, + { + "epoch": 0.36199095022624433, + "grad_norm": 0.8778782486915588, + "learning_rate": 2.9606470739213066e-05, + "loss": 1.1112, + "step": 160 + }, + { + "epoch": 0.3733031674208145, + "grad_norm": 1.0742566585540771, + "learning_rate": 2.956274811223042e-05, + "loss": 0.9584, + "step": 165 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 0.8409031629562378, + "learning_rate": 2.9516758618468994e-05, + "loss": 1.057, + "step": 170 + }, + { + "epoch": 0.39592760180995473, + "grad_norm": 0.9990507364273071, + "learning_rate": 2.9468509416746267e-05, + "loss": 0.9637, + "step": 175 + }, + { + "epoch": 0.4072398190045249, + "grad_norm": 1.022527813911438, + "learning_rate": 2.9418008017630402e-05, + "loss": 0.9378, + "step": 180 + }, + { + "epoch": 0.41855203619909503, + "grad_norm": 1.003588080406189, + "learning_rate": 2.9365262282271173e-05, + "loss": 0.9782, + "step": 185 + }, + { + "epoch": 0.4298642533936652, + "grad_norm": 1.0717016458511353, + "learning_rate": 2.9310280421176255e-05, + "loss": 1.0017, + "step": 190 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 1.1871888637542725, + "learning_rate": 2.925307099293318e-05, + "loss": 0.9164, + "step": 195 + }, + { + "epoch": 0.45248868778280543, + "grad_norm": 1.0288134813308716, + "learning_rate": 2.9193642902877077e-05, + "loss": 0.9169, + "step": 200 + }, + { + "epoch": 0.4638009049773756, + "grad_norm": 1.0674618482589722, + "learning_rate": 2.9132005401704442e-05, + "loss": 0.9483, + "step": 205 + }, + { + "epoch": 0.4751131221719457, + "grad_norm": 0.9838318824768066, + "learning_rate": 2.906816808403319e-05, + "loss": 0.9891, + "step": 210 + }, + { + "epoch": 0.48642533936651583, + "grad_norm": 1.0710904598236084, + "learning_rate": 2.9002140886909087e-05, + "loss": 0.8226, + "step": 215 + }, + { + "epoch": 0.497737556561086, + "grad_norm": 1.068298101425171, + "learning_rate": 2.893393408825898e-05, + "loss": 0.7942, + "step": 220 + }, + { + "epoch": 0.5090497737556561, + "grad_norm": 1.2012892961502075, + "learning_rate": 2.886355830529088e-05, + "loss": 0.8183, + "step": 225 + }, + { + "epoch": 0.5203619909502263, + "grad_norm": 1.1616811752319336, + "learning_rate": 2.8791024492841274e-05, + "loss": 0.8241, + "step": 230 + }, + { + "epoch": 0.5316742081447964, + "grad_norm": 1.1439414024353027, + "learning_rate": 2.8716343941669888e-05, + "loss": 0.8019, + "step": 235 + }, + { + "epoch": 0.5429864253393665, + "grad_norm": 1.079946517944336, + "learning_rate": 2.863952827670212e-05, + "loss": 0.864, + "step": 240 + }, + { + "epoch": 0.5542986425339367, + "grad_norm": 1.1706621646881104, + "learning_rate": 2.8560589455219503e-05, + "loss": 0.7166, + "step": 245 + }, + { + "epoch": 0.5656108597285068, + "grad_norm": 1.1154924631118774, + "learning_rate": 2.8479539764998393e-05, + "loss": 0.8244, + "step": 250 + }, + { + "epoch": 0.5769230769230769, + "grad_norm": 1.160692572593689, + "learning_rate": 2.8396391822397238e-05, + "loss": 0.8, + "step": 255 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1117918491363525, + "learning_rate": 2.8311158570392694e-05, + "loss": 0.7389, + "step": 260 + }, + { + "epoch": 0.5995475113122172, + "grad_norm": 1.1170542240142822, + "learning_rate": 2.822385327656488e-05, + "loss": 0.7368, + "step": 265 + }, + { + "epoch": 0.6108597285067874, + "grad_norm": 1.1377840042114258, + "learning_rate": 2.8134489531032144e-05, + "loss": 0.7176, + "step": 270 + }, + { + "epoch": 0.6221719457013575, + "grad_norm": 1.215258240699768, + "learning_rate": 2.804308124433557e-05, + "loss": 0.7581, + "step": 275 + }, + { + "epoch": 0.6334841628959276, + "grad_norm": 1.1784403324127197, + "learning_rate": 2.794964264527365e-05, + "loss": 0.6722, + "step": 280 + }, + { + "epoch": 0.6447963800904978, + "grad_norm": 1.2706748247146606, + "learning_rate": 2.78541882786874e-05, + "loss": 0.732, + "step": 285 + }, + { + "epoch": 0.6561085972850679, + "grad_norm": 1.1579205989837646, + "learning_rate": 2.7756733003196287e-05, + "loss": 0.731, + "step": 290 + }, + { + "epoch": 0.667420814479638, + "grad_norm": 1.1493475437164307, + "learning_rate": 2.765729198888529e-05, + "loss": 0.7093, + "step": 295 + }, + { + "epoch": 0.6787330316742082, + "grad_norm": 1.2283833026885986, + "learning_rate": 2.7555880714943506e-05, + "loss": 0.7031, + "step": 300 + }, + { + "epoch": 0.6900452488687783, + "grad_norm": 1.2910563945770264, + "learning_rate": 2.745251496725462e-05, + "loss": 0.6855, + "step": 305 + }, + { + "epoch": 0.7013574660633484, + "grad_norm": 1.2336117029190063, + "learning_rate": 2.7347210835939657e-05, + "loss": 0.6507, + "step": 310 + }, + { + "epoch": 0.7126696832579186, + "grad_norm": 1.2348319292068481, + "learning_rate": 2.7239984712852344e-05, + "loss": 0.6733, + "step": 315 + }, + { + "epoch": 0.7239819004524887, + "grad_norm": 1.1727352142333984, + "learning_rate": 2.7130853289027526e-05, + "loss": 0.6838, + "step": 320 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.3151297569274902, + "learning_rate": 2.7019833552083016e-05, + "loss": 0.6322, + "step": 325 + }, + { + "epoch": 0.746606334841629, + "grad_norm": 1.112032413482666, + "learning_rate": 2.6906942783575258e-05, + "loss": 0.6224, + "step": 330 + }, + { + "epoch": 0.7579185520361991, + "grad_norm": 1.251057505607605, + "learning_rate": 2.679219855630925e-05, + "loss": 0.6033, + "step": 335 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.312796950340271, + "learning_rate": 2.6675618731603107e-05, + "loss": 0.5809, + "step": 340 + }, + { + "epoch": 0.7805429864253394, + "grad_norm": 1.3027918338775635, + "learning_rate": 2.6557221456507775e-05, + "loss": 0.5874, + "step": 345 + }, + { + "epoch": 0.7918552036199095, + "grad_norm": 1.1726665496826172, + "learning_rate": 2.643702516098218e-05, + "loss": 0.639, + "step": 350 + }, + { + "epoch": 0.8031674208144797, + "grad_norm": 1.1864731311798096, + "learning_rate": 2.6315048555024396e-05, + "loss": 0.5773, + "step": 355 + }, + { + "epoch": 0.8144796380090498, + "grad_norm": 1.3082315921783447, + "learning_rate": 2.6191310625759232e-05, + "loss": 0.6098, + "step": 360 + }, + { + "epoch": 0.8257918552036199, + "grad_norm": 1.2955681085586548, + "learning_rate": 2.6065830634482625e-05, + "loss": 0.5795, + "step": 365 + }, + { + "epoch": 0.8371040723981901, + "grad_norm": 1.2314718961715698, + "learning_rate": 2.5938628113663415e-05, + "loss": 0.5281, + "step": 370 + }, + { + "epoch": 0.8484162895927602, + "grad_norm": 1.335590124130249, + "learning_rate": 2.5809722863902857e-05, + "loss": 0.5594, + "step": 375 + }, + { + "epoch": 0.8597285067873304, + "grad_norm": 1.1286859512329102, + "learning_rate": 2.567913495085244e-05, + "loss": 0.4919, + "step": 380 + }, + { + "epoch": 0.8710407239819005, + "grad_norm": 1.3524227142333984, + "learning_rate": 2.554688470209041e-05, + "loss": 0.5757, + "step": 385 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.2273483276367188, + "learning_rate": 2.5412992703957556e-05, + "loss": 0.5179, + "step": 390 + }, + { + "epoch": 0.8936651583710408, + "grad_norm": 1.3723407983779907, + "learning_rate": 2.5277479798352682e-05, + "loss": 0.4531, + "step": 395 + }, + { + "epoch": 0.9049773755656109, + "grad_norm": 1.1838219165802002, + "learning_rate": 2.514036707948833e-05, + "loss": 0.4766, + "step": 400 + }, + { + "epoch": 0.916289592760181, + "grad_norm": 1.2562397718429565, + "learning_rate": 2.5001675890607195e-05, + "loss": 0.4903, + "step": 405 + }, + { + "epoch": 0.9276018099547512, + "grad_norm": 1.2931199073791504, + "learning_rate": 2.4861427820659813e-05, + "loss": 0.5661, + "step": 410 + }, + { + "epoch": 0.9389140271493213, + "grad_norm": 1.2278292179107666, + "learning_rate": 2.471964470094396e-05, + "loss": 0.4575, + "step": 415 + }, + { + "epoch": 0.9502262443438914, + "grad_norm": 1.2152196168899536, + "learning_rate": 2.4576348601706366e-05, + "loss": 0.5391, + "step": 420 + }, + { + "epoch": 0.9615384615384616, + "grad_norm": 1.3050040006637573, + "learning_rate": 2.4431561828707208e-05, + "loss": 0.5065, + "step": 425 + }, + { + "epoch": 0.9728506787330317, + "grad_norm": 1.3901944160461426, + "learning_rate": 2.428530691974795e-05, + "loss": 0.4822, + "step": 430 + }, + { + "epoch": 0.9841628959276018, + "grad_norm": 1.1721259355545044, + "learning_rate": 2.4137606641163064e-05, + "loss": 0.5213, + "step": 435 + }, + { + "epoch": 0.995475113122172, + "grad_norm": 1.3450927734375, + "learning_rate": 2.3988483984276174e-05, + "loss": 0.4552, + "step": 440 + }, + { + "epoch": 1.006787330316742, + "grad_norm": 1.2216827869415283, + "learning_rate": 2.3837962161821183e-05, + "loss": 0.4877, + "step": 445 + }, + { + "epoch": 1.0180995475113122, + "grad_norm": 1.35059654712677, + "learning_rate": 2.368606460432894e-05, + "loss": 0.3684, + "step": 450 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 1.3559701442718506, + "learning_rate": 2.353281495647998e-05, + "loss": 0.4406, + "step": 455 + }, + { + "epoch": 1.0407239819004526, + "grad_norm": 1.2346910238265991, + "learning_rate": 2.3378237073423957e-05, + "loss": 0.4315, + "step": 460 + }, + { + "epoch": 1.0520361990950227, + "grad_norm": 1.153168797492981, + "learning_rate": 2.322235501706629e-05, + "loss": 0.3837, + "step": 465 + }, + { + "epoch": 1.0633484162895928, + "grad_norm": 1.3708627223968506, + "learning_rate": 2.3065193052322667e-05, + "loss": 0.3789, + "step": 470 + }, + { + "epoch": 1.0746606334841629, + "grad_norm": 1.393190622329712, + "learning_rate": 2.2906775643341883e-05, + "loss": 0.4062, + "step": 475 + }, + { + "epoch": 1.085972850678733, + "grad_norm": 1.7389261722564697, + "learning_rate": 2.274712744969772e-05, + "loss": 0.4172, + "step": 480 + }, + { + "epoch": 1.0972850678733033, + "grad_norm": 1.2809358835220337, + "learning_rate": 2.2586273322550404e-05, + "loss": 0.3928, + "step": 485 + }, + { + "epoch": 1.1085972850678734, + "grad_norm": 1.1811197996139526, + "learning_rate": 2.2424238300778176e-05, + "loss": 0.3557, + "step": 490 + }, + { + "epoch": 1.1199095022624435, + "grad_norm": 1.2107412815093994, + "learning_rate": 2.226104760707974e-05, + "loss": 0.3998, + "step": 495 + }, + { + "epoch": 1.1312217194570136, + "grad_norm": 1.255383014678955, + "learning_rate": 2.2096726644048016e-05, + "loss": 0.3546, + "step": 500 + }, + { + "epoch": 1.1425339366515836, + "grad_norm": 1.4037586450576782, + "learning_rate": 2.1931300990215943e-05, + "loss": 0.3452, + "step": 505 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 1.2695441246032715, + "learning_rate": 2.176479639607485e-05, + "loss": 0.3825, + "step": 510 + }, + { + "epoch": 1.165158371040724, + "grad_norm": 1.36899733543396, + "learning_rate": 2.159723878006609e-05, + "loss": 0.3274, + "step": 515 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 1.3221948146820068, + "learning_rate": 2.142865422454654e-05, + "loss": 0.3866, + "step": 520 + }, + { + "epoch": 1.1877828054298643, + "grad_norm": 1.2656069993972778, + "learning_rate": 2.1259068971728547e-05, + "loss": 0.3475, + "step": 525 + }, + { + "epoch": 1.1990950226244343, + "grad_norm": 1.2648141384124756, + "learning_rate": 2.1088509419595007e-05, + "loss": 0.3253, + "step": 530 + }, + { + "epoch": 1.2104072398190044, + "grad_norm": 1.4578686952590942, + "learning_rate": 2.0917002117790247e-05, + "loss": 0.3602, + "step": 535 + }, + { + "epoch": 1.2217194570135748, + "grad_norm": 1.1993721723556519, + "learning_rate": 2.0744573763487195e-05, + "loss": 0.3345, + "step": 540 + }, + { + "epoch": 1.2330316742081449, + "grad_norm": 1.2041844129562378, + "learning_rate": 2.057125119723168e-05, + "loss": 0.3446, + "step": 545 + }, + { + "epoch": 1.244343891402715, + "grad_norm": 1.3378663063049316, + "learning_rate": 2.0397061398764367e-05, + "loss": 0.3223, + "step": 550 + }, + { + "epoch": 1.255656108597285, + "grad_norm": 1.3779833316802979, + "learning_rate": 2.0222031482821033e-05, + "loss": 0.3164, + "step": 555 + }, + { + "epoch": 1.2669683257918551, + "grad_norm": 1.1973909139633179, + "learning_rate": 2.004618869491186e-05, + "loss": 0.3021, + "step": 560 + }, + { + "epoch": 1.2782805429864252, + "grad_norm": 1.3270084857940674, + "learning_rate": 1.9869560407080295e-05, + "loss": 0.2992, + "step": 565 + }, + { + "epoch": 1.2895927601809956, + "grad_norm": 1.427246332168579, + "learning_rate": 1.9692174113642307e-05, + "loss": 0.3322, + "step": 570 + }, + { + "epoch": 1.3009049773755657, + "grad_norm": 1.3284002542495728, + "learning_rate": 1.9514057426906536e-05, + "loss": 0.3118, + "step": 575 + }, + { + "epoch": 1.3122171945701357, + "grad_norm": 1.3854244947433472, + "learning_rate": 1.933523807287612e-05, + "loss": 0.3036, + "step": 580 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 1.2437269687652588, + "learning_rate": 1.9155743886932825e-05, + "loss": 0.3251, + "step": 585 + }, + { + "epoch": 1.334841628959276, + "grad_norm": 1.4309046268463135, + "learning_rate": 1.8975602809504086e-05, + "loss": 0.3128, + "step": 590 + }, + { + "epoch": 1.3461538461538463, + "grad_norm": 1.222485899925232, + "learning_rate": 1.8794842881713793e-05, + "loss": 0.3087, + "step": 595 + }, + { + "epoch": 1.3574660633484164, + "grad_norm": 1.2627407312393188, + "learning_rate": 1.861349224101733e-05, + "loss": 0.2953, + "step": 600 + }, + { + "epoch": 1.3687782805429864, + "grad_norm": 1.451121211051941, + "learning_rate": 1.8431579116821643e-05, + "loss": 0.2795, + "step": 605 + }, + { + "epoch": 1.3800904977375565, + "grad_norm": 1.4404269456863403, + "learning_rate": 1.824913182609099e-05, + "loss": 0.2786, + "step": 610 + }, + { + "epoch": 1.3914027149321266, + "grad_norm": 1.3693130016326904, + "learning_rate": 1.806617876893907e-05, + "loss": 0.2988, + "step": 615 + }, + { + "epoch": 1.4027149321266967, + "grad_norm": 1.2052091360092163, + "learning_rate": 1.7882748424208227e-05, + "loss": 0.2715, + "step": 620 + }, + { + "epoch": 1.4140271493212668, + "grad_norm": 1.25300931930542, + "learning_rate": 1.7698869345036323e-05, + "loss": 0.2776, + "step": 625 + }, + { + "epoch": 1.4253393665158371, + "grad_norm": 1.2121245861053467, + "learning_rate": 1.7514570154412146e-05, + "loss": 0.2842, + "step": 630 + }, + { + "epoch": 1.4366515837104072, + "grad_norm": 1.4474380016326904, + "learning_rate": 1.7329879540719878e-05, + "loss": 0.2725, + "step": 635 + }, + { + "epoch": 1.4479638009049773, + "grad_norm": 1.218845009803772, + "learning_rate": 1.7144826253273405e-05, + "loss": 0.3076, + "step": 640 + }, + { + "epoch": 1.4592760180995474, + "grad_norm": 1.2049261331558228, + "learning_rate": 1.6959439097841134e-05, + "loss": 0.2627, + "step": 645 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 1.1467936038970947, + "learning_rate": 1.6773746932162063e-05, + "loss": 0.2609, + "step": 650 + }, + { + "epoch": 1.4819004524886878, + "grad_norm": 1.1811492443084717, + "learning_rate": 1.6587778661453674e-05, + "loss": 0.2892, + "step": 655 + }, + { + "epoch": 1.493212669683258, + "grad_norm": 1.1899230480194092, + "learning_rate": 1.6401563233912527e-05, + "loss": 0.2253, + "step": 660 + }, + { + "epoch": 1.504524886877828, + "grad_norm": 1.3298557996749878, + "learning_rate": 1.6215129636208106e-05, + "loss": 0.2745, + "step": 665 + }, + { + "epoch": 1.5158371040723981, + "grad_norm": 1.2957813739776611, + "learning_rate": 1.6028506888970708e-05, + "loss": 0.269, + "step": 670 + }, + { + "epoch": 1.5271493212669682, + "grad_norm": 1.1800564527511597, + "learning_rate": 1.584172404227404e-05, + "loss": 0.229, + "step": 675 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 1.2840266227722168, + "learning_rate": 1.5654810171113197e-05, + "loss": 0.2662, + "step": 680 + }, + { + "epoch": 1.5497737556561086, + "grad_norm": 1.224390983581543, + "learning_rate": 1.546779437087881e-05, + "loss": 0.2296, + "step": 685 + }, + { + "epoch": 1.5610859728506787, + "grad_norm": 1.3194774389266968, + "learning_rate": 1.5280705752828e-05, + "loss": 0.2297, + "step": 690 + }, + { + "epoch": 1.5723981900452488, + "grad_norm": 1.1572487354278564, + "learning_rate": 1.5093573439552856e-05, + "loss": 0.2137, + "step": 695 + }, + { + "epoch": 1.5837104072398192, + "grad_norm": 1.2448298931121826, + "learning_rate": 1.4906426560447147e-05, + "loss": 0.263, + "step": 700 + }, + { + "epoch": 1.5950226244343892, + "grad_norm": 1.3129366636276245, + "learning_rate": 1.4719294247172007e-05, + "loss": 0.2331, + "step": 705 + }, + { + "epoch": 1.6063348416289593, + "grad_norm": 1.2388367652893066, + "learning_rate": 1.4532205629121196e-05, + "loss": 0.242, + "step": 710 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 1.316123127937317, + "learning_rate": 1.4345189828886806e-05, + "loss": 0.2419, + "step": 715 + }, + { + "epoch": 1.6289592760180995, + "grad_norm": 1.2912193536758423, + "learning_rate": 1.4158275957725964e-05, + "loss": 0.2277, + "step": 720 + }, + { + "epoch": 1.6402714932126696, + "grad_norm": 1.0356892347335815, + "learning_rate": 1.3971493111029293e-05, + "loss": 0.2307, + "step": 725 + }, + { + "epoch": 1.6515837104072397, + "grad_norm": 1.299077033996582, + "learning_rate": 1.3784870363791903e-05, + "loss": 0.2513, + "step": 730 + }, + { + "epoch": 1.6628959276018098, + "grad_norm": 1.2481838464736938, + "learning_rate": 1.3598436766087479e-05, + "loss": 0.1938, + "step": 735 + }, + { + "epoch": 1.6742081447963801, + "grad_norm": 1.3313149213790894, + "learning_rate": 1.341222133854633e-05, + "loss": 0.2142, + "step": 740 + }, + { + "epoch": 1.6855203619909502, + "grad_norm": 1.1681747436523438, + "learning_rate": 1.322625306783794e-05, + "loss": 0.237, + "step": 745 + }, + { + "epoch": 1.6968325791855203, + "grad_norm": 1.235882043838501, + "learning_rate": 1.3040560902158862e-05, + "loss": 0.1949, + "step": 750 + }, + { + "epoch": 1.7081447963800906, + "grad_norm": 1.2904084920883179, + "learning_rate": 1.2855173746726602e-05, + "loss": 0.2036, + "step": 755 + }, + { + "epoch": 1.7194570135746607, + "grad_norm": 1.3183492422103882, + "learning_rate": 1.2670120459280128e-05, + "loss": 0.2171, + "step": 760 + }, + { + "epoch": 1.7307692307692308, + "grad_norm": 1.4182847738265991, + "learning_rate": 1.2485429845587862e-05, + "loss": 0.2284, + "step": 765 + }, + { + "epoch": 1.742081447963801, + "grad_norm": 1.0953562259674072, + "learning_rate": 1.230113065496368e-05, + "loss": 0.1989, + "step": 770 + }, + { + "epoch": 1.753393665158371, + "grad_norm": 1.4009143114089966, + "learning_rate": 1.2117251575791775e-05, + "loss": 0.2185, + "step": 775 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 1.346669316291809, + "learning_rate": 1.1933821231060932e-05, + "loss": 0.2119, + "step": 780 + }, + { + "epoch": 1.7760180995475112, + "grad_norm": 1.2591272592544556, + "learning_rate": 1.1750868173909014e-05, + "loss": 0.2033, + "step": 785 + }, + { + "epoch": 1.7873303167420813, + "grad_norm": 1.3320597410202026, + "learning_rate": 1.1568420883178363e-05, + "loss": 0.1608, + "step": 790 + }, + { + "epoch": 1.7986425339366516, + "grad_norm": 1.3073947429656982, + "learning_rate": 1.1386507758982672e-05, + "loss": 0.2134, + "step": 795 + }, + { + "epoch": 1.8099547511312217, + "grad_norm": 1.2365235090255737, + "learning_rate": 1.1205157118286203e-05, + "loss": 0.2181, + "step": 800 + }, + { + "epoch": 1.8212669683257918, + "grad_norm": 1.3346391916275024, + "learning_rate": 1.1024397190495915e-05, + "loss": 0.2123, + "step": 805 + }, + { + "epoch": 1.8325791855203621, + "grad_norm": 1.2849234342575073, + "learning_rate": 1.0844256113067177e-05, + "loss": 0.1774, + "step": 810 + }, + { + "epoch": 1.8438914027149322, + "grad_norm": 1.146439790725708, + "learning_rate": 1.0664761927123882e-05, + "loss": 0.1734, + "step": 815 + }, + { + "epoch": 1.8552036199095023, + "grad_norm": 1.1749260425567627, + "learning_rate": 1.0485942573093468e-05, + "loss": 0.1862, + "step": 820 + }, + { + "epoch": 1.8665158371040724, + "grad_norm": 1.058442234992981, + "learning_rate": 1.0307825886357697e-05, + "loss": 0.1621, + "step": 825 + }, + { + "epoch": 1.8778280542986425, + "grad_norm": 1.1935168504714966, + "learning_rate": 1.0130439592919706e-05, + "loss": 0.1605, + "step": 830 + }, + { + "epoch": 1.8891402714932126, + "grad_norm": 1.2611067295074463, + "learning_rate": 9.953811305088142e-06, + "loss": 0.176, + "step": 835 + }, + { + "epoch": 1.9004524886877827, + "grad_norm": 1.0497376918792725, + "learning_rate": 9.777968517178967e-06, + "loss": 0.1525, + "step": 840 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 1.0573829412460327, + "learning_rate": 9.60293860123564e-06, + "loss": 0.1645, + "step": 845 + }, + { + "epoch": 1.9230769230769231, + "grad_norm": 0.9358425140380859, + "learning_rate": 9.428748802768328e-06, + "loss": 0.1728, + "step": 850 + }, + { + "epoch": 1.9343891402714932, + "grad_norm": 1.2219207286834717, + "learning_rate": 9.25542623651281e-06, + "loss": 0.1825, + "step": 855 + }, + { + "epoch": 1.9457013574660633, + "grad_norm": 1.4892945289611816, + "learning_rate": 9.082997882209754e-06, + "loss": 0.1739, + "step": 860 + }, + { + "epoch": 1.9570135746606336, + "grad_norm": 1.2295011281967163, + "learning_rate": 8.911490580404996e-06, + "loss": 0.16, + "step": 865 + }, + { + "epoch": 1.9683257918552037, + "grad_norm": 1.1801732778549194, + "learning_rate": 8.740931028271462e-06, + "loss": 0.1486, + "step": 870 + }, + { + "epoch": 1.9796380090497738, + "grad_norm": 1.1491286754608154, + "learning_rate": 8.571345775453468e-06, + "loss": 0.1586, + "step": 875 + }, + { + "epoch": 1.990950226244344, + "grad_norm": 1.3491284847259521, + "learning_rate": 8.402761219933911e-06, + "loss": 0.1383, + "step": 880 + }, + { + "epoch": 2.002262443438914, + "grad_norm": 1.3663790225982666, + "learning_rate": 8.23520360392515e-06, + "loss": 0.1677, + "step": 885 + }, + { + "epoch": 2.013574660633484, + "grad_norm": 1.308180332183838, + "learning_rate": 8.068699009784057e-06, + "loss": 0.1371, + "step": 890 + }, + { + "epoch": 2.024886877828054, + "grad_norm": 1.0238186120986938, + "learning_rate": 7.90327335595198e-06, + "loss": 0.1362, + "step": 895 + }, + { + "epoch": 2.0361990950226243, + "grad_norm": 0.9665879011154175, + "learning_rate": 7.738952392920262e-06, + "loss": 0.1328, + "step": 900 + }, + { + "epoch": 2.0475113122171944, + "grad_norm": 1.0747697353363037, + "learning_rate": 7.575761699221828e-06, + "loss": 0.1268, + "step": 905 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 1.0043469667434692, + "learning_rate": 7.413726677449603e-06, + "loss": 0.1219, + "step": 910 + }, + { + "epoch": 2.070135746606335, + "grad_norm": 1.0756194591522217, + "learning_rate": 7.252872550302278e-06, + "loss": 0.1253, + "step": 915 + }, + { + "epoch": 2.081447963800905, + "grad_norm": 1.1492919921875, + "learning_rate": 7.093224356658117e-06, + "loss": 0.125, + "step": 920 + }, + { + "epoch": 2.0927601809954752, + "grad_norm": 0.9295089840888977, + "learning_rate": 6.934806947677335e-06, + "loss": 0.1112, + "step": 925 + }, + { + "epoch": 2.1040723981900453, + "grad_norm": 0.9820637702941895, + "learning_rate": 6.7776449829337065e-06, + "loss": 0.1256, + "step": 930 + }, + { + "epoch": 2.1153846153846154, + "grad_norm": 0.8693454265594482, + "learning_rate": 6.621762926576046e-06, + "loss": 0.117, + "step": 935 + }, + { + "epoch": 2.1266968325791855, + "grad_norm": 0.8523662090301514, + "learning_rate": 6.467185043520024e-06, + "loss": 0.124, + "step": 940 + }, + { + "epoch": 2.1380090497737556, + "grad_norm": 1.112913966178894, + "learning_rate": 6.313935395671061e-06, + "loss": 0.1144, + "step": 945 + }, + { + "epoch": 2.1493212669683257, + "grad_norm": 1.380711555480957, + "learning_rate": 6.162037838178821e-06, + "loss": 0.1299, + "step": 950 + }, + { + "epoch": 2.160633484162896, + "grad_norm": 0.9709773659706116, + "learning_rate": 6.01151601572383e-06, + "loss": 0.1071, + "step": 955 + }, + { + "epoch": 2.171945701357466, + "grad_norm": 0.9356366991996765, + "learning_rate": 5.86239335883694e-06, + "loss": 0.1118, + "step": 960 + }, + { + "epoch": 2.183257918552036, + "grad_norm": 0.9257097244262695, + "learning_rate": 5.71469308025205e-06, + "loss": 0.1213, + "step": 965 + }, + { + "epoch": 2.1945701357466065, + "grad_norm": 1.234092116355896, + "learning_rate": 5.568438171292794e-06, + "loss": 0.1045, + "step": 970 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 1.1084325313568115, + "learning_rate": 5.4236513982936396e-06, + "loss": 0.1162, + "step": 975 + }, + { + "epoch": 2.2171945701357467, + "grad_norm": 0.8456999659538269, + "learning_rate": 5.280355299056043e-06, + "loss": 0.1072, + "step": 980 + }, + { + "epoch": 2.228506787330317, + "grad_norm": 1.0667545795440674, + "learning_rate": 5.138572179340193e-06, + "loss": 0.1001, + "step": 985 + }, + { + "epoch": 2.239819004524887, + "grad_norm": 1.0212305784225464, + "learning_rate": 4.998324109392807e-06, + "loss": 0.125, + "step": 990 + }, + { + "epoch": 2.251131221719457, + "grad_norm": 1.039292335510254, + "learning_rate": 4.859632920511675e-06, + "loss": 0.1026, + "step": 995 + }, + { + "epoch": 2.262443438914027, + "grad_norm": 0.9964610934257507, + "learning_rate": 4.7225202016473195e-06, + "loss": 0.1118, + "step": 1000 + }, + { + "epoch": 2.273755656108597, + "grad_norm": 0.9245526790618896, + "learning_rate": 4.587007296042448e-06, + "loss": 0.1085, + "step": 1005 + }, + { + "epoch": 2.2850678733031673, + "grad_norm": 1.2325752973556519, + "learning_rate": 4.453115297909595e-06, + "loss": 0.0984, + "step": 1010 + }, + { + "epoch": 2.2963800904977374, + "grad_norm": 1.0501067638397217, + "learning_rate": 4.320865049147563e-06, + "loss": 0.1151, + "step": 1015 + }, + { + "epoch": 2.3076923076923075, + "grad_norm": 0.96894371509552, + "learning_rate": 4.190277136097146e-06, + "loss": 0.0909, + "step": 1020 + }, + { + "epoch": 2.3190045248868776, + "grad_norm": 0.8868731260299683, + "learning_rate": 4.061371886336584e-06, + "loss": 0.1025, + "step": 1025 + }, + { + "epoch": 2.330316742081448, + "grad_norm": 1.039445161819458, + "learning_rate": 3.93416936551737e-06, + "loss": 0.1172, + "step": 1030 + }, + { + "epoch": 2.341628959276018, + "grad_norm": 0.965347170829773, + "learning_rate": 3.808689374240769e-06, + "loss": 0.1175, + "step": 1035 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.8280678391456604, + "learning_rate": 3.684951444975608e-06, + "loss": 0.1005, + "step": 1040 + }, + { + "epoch": 2.3642533936651584, + "grad_norm": 0.6611517071723938, + "learning_rate": 3.5629748390178295e-06, + "loss": 0.1047, + "step": 1045 + }, + { + "epoch": 2.3755656108597285, + "grad_norm": 1.0721936225891113, + "learning_rate": 3.442778543492227e-06, + "loss": 0.0981, + "step": 1050 + }, + { + "epoch": 2.3868778280542986, + "grad_norm": 1.035072684288025, + "learning_rate": 3.324381268396896e-06, + "loss": 0.1063, + "step": 1055 + }, + { + "epoch": 2.3981900452488687, + "grad_norm": 1.2653448581695557, + "learning_rate": 3.2078014436907556e-06, + "loss": 0.1187, + "step": 1060 + }, + { + "epoch": 2.409502262443439, + "grad_norm": 0.7094863653182983, + "learning_rate": 3.0930572164247408e-06, + "loss": 0.0886, + "step": 1065 + }, + { + "epoch": 2.420814479638009, + "grad_norm": 1.0550916194915771, + "learning_rate": 2.9801664479169845e-06, + "loss": 0.089, + "step": 1070 + }, + { + "epoch": 2.4321266968325794, + "grad_norm": 0.7732695937156677, + "learning_rate": 2.8691467109724777e-06, + "loss": 0.0877, + "step": 1075 + }, + { + "epoch": 2.4434389140271495, + "grad_norm": 0.9020242691040039, + "learning_rate": 2.760015287147662e-06, + "loss": 0.0969, + "step": 1080 + }, + { + "epoch": 2.4547511312217196, + "grad_norm": 0.9925400614738464, + "learning_rate": 2.652789164060346e-06, + "loss": 0.1179, + "step": 1085 + }, + { + "epoch": 2.4660633484162897, + "grad_norm": 1.0866385698318481, + "learning_rate": 2.5474850327453785e-06, + "loss": 0.1258, + "step": 1090 + }, + { + "epoch": 2.47737556561086, + "grad_norm": 0.7385374903678894, + "learning_rate": 2.4441192850564962e-06, + "loss": 0.1044, + "step": 1095 + }, + { + "epoch": 2.48868778280543, + "grad_norm": 0.7336291074752808, + "learning_rate": 2.342708011114708e-06, + "loss": 0.0967, + "step": 1100 + }, + { + "epoch": 2.5, + "grad_norm": 0.8333237767219543, + "learning_rate": 2.243266996803712e-06, + "loss": 0.087, + "step": 1105 + }, + { + "epoch": 2.51131221719457, + "grad_norm": 1.1380997896194458, + "learning_rate": 2.1458117213126012e-06, + "loss": 0.089, + "step": 1110 + }, + { + "epoch": 2.52262443438914, + "grad_norm": 0.8294705152511597, + "learning_rate": 2.0503573547263528e-06, + "loss": 0.0947, + "step": 1115 + }, + { + "epoch": 2.5339366515837103, + "grad_norm": 0.9763185977935791, + "learning_rate": 1.9569187556644336e-06, + "loss": 0.0994, + "step": 1120 + }, + { + "epoch": 2.5452488687782804, + "grad_norm": 0.7412990927696228, + "learning_rate": 1.8655104689678555e-06, + "loss": 0.0856, + "step": 1125 + }, + { + "epoch": 2.5565610859728505, + "grad_norm": 0.8601937890052795, + "learning_rate": 1.7761467234351191e-06, + "loss": 0.1018, + "step": 1130 + }, + { + "epoch": 2.5678733031674206, + "grad_norm": 0.7068769335746765, + "learning_rate": 1.6888414296073058e-06, + "loss": 0.0918, + "step": 1135 + }, + { + "epoch": 2.579185520361991, + "grad_norm": 0.7646127343177795, + "learning_rate": 1.6036081776027623e-06, + "loss": 0.086, + "step": 1140 + }, + { + "epoch": 2.590497737556561, + "grad_norm": 0.8778402209281921, + "learning_rate": 1.52046023500161e-06, + "loss": 0.0817, + "step": 1145 + }, + { + "epoch": 2.6018099547511313, + "grad_norm": 0.7813743948936462, + "learning_rate": 1.4394105447804994e-06, + "loss": 0.0786, + "step": 1150 + }, + { + "epoch": 2.6131221719457014, + "grad_norm": 0.7970955967903137, + "learning_rate": 1.360471723297882e-06, + "loss": 0.0875, + "step": 1155 + }, + { + "epoch": 2.6244343891402715, + "grad_norm": 0.7120739817619324, + "learning_rate": 1.2836560583301139e-06, + "loss": 0.0781, + "step": 1160 + }, + { + "epoch": 2.6357466063348416, + "grad_norm": 1.2211302518844604, + "learning_rate": 1.20897550715873e-06, + "loss": 0.1089, + "step": 1165 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 1.1161458492279053, + "learning_rate": 1.1364416947091244e-06, + "loss": 0.1003, + "step": 1170 + }, + { + "epoch": 2.658371040723982, + "grad_norm": 0.6640447378158569, + "learning_rate": 1.066065911741021e-06, + "loss": 0.0898, + "step": 1175 + }, + { + "epoch": 2.669683257918552, + "grad_norm": 0.6894675493240356, + "learning_rate": 9.978591130909142e-07, + "loss": 0.0914, + "step": 1180 + }, + { + "epoch": 2.6809954751131224, + "grad_norm": 0.7537389397621155, + "learning_rate": 9.318319159668137e-07, + "loss": 0.0789, + "step": 1185 + }, + { + "epoch": 2.6923076923076925, + "grad_norm": 0.7062424421310425, + "learning_rate": 8.679945982955589e-07, + "loss": 0.086, + "step": 1190 + }, + { + "epoch": 2.7036199095022626, + "grad_norm": 0.7462336421012878, + "learning_rate": 8.063570971229245e-07, + "loss": 0.0753, + "step": 1195 + }, + { + "epoch": 2.7149321266968327, + "grad_norm": 0.9509839415550232, + "learning_rate": 7.469290070668189e-07, + "loss": 0.0931, + "step": 1200 + }, + { + "epoch": 2.726244343891403, + "grad_norm": 0.6872794032096863, + "learning_rate": 6.897195788237442e-07, + "loss": 0.0974, + "step": 1205 + }, + { + "epoch": 2.737556561085973, + "grad_norm": 0.5742108225822449, + "learning_rate": 6.347377177288283e-07, + "loss": 0.0895, + "step": 1210 + }, + { + "epoch": 2.748868778280543, + "grad_norm": 0.7682706117630005, + "learning_rate": 5.819919823695996e-07, + "loss": 0.0889, + "step": 1215 + }, + { + "epoch": 2.760180995475113, + "grad_norm": 0.769767701625824, + "learning_rate": 5.31490583253737e-07, + "loss": 0.092, + "step": 1220 + }, + { + "epoch": 2.771493212669683, + "grad_norm": 0.8331628441810608, + "learning_rate": 4.832413815310083e-07, + "loss": 0.1019, + "step": 1225 + }, + { + "epoch": 2.7828054298642533, + "grad_norm": 0.7351166605949402, + "learning_rate": 4.3725188776958247e-07, + "loss": 0.0922, + "step": 1230 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.8393566012382507, + "learning_rate": 3.935292607869334e-07, + "loss": 0.1036, + "step": 1235 + }, + { + "epoch": 2.8054298642533935, + "grad_norm": 0.679094672203064, + "learning_rate": 3.520803065354694e-07, + "loss": 0.0831, + "step": 1240 + }, + { + "epoch": 2.8167420814479636, + "grad_norm": 0.6812605857849121, + "learning_rate": 3.129114770431074e-07, + "loss": 0.0847, + "step": 1245 + }, + { + "epoch": 2.8280542986425337, + "grad_norm": 0.6340612173080444, + "learning_rate": 2.7602886940894633e-07, + "loss": 0.1003, + "step": 1250 + }, + { + "epoch": 2.839366515837104, + "grad_norm": 0.7826530933380127, + "learning_rate": 2.41438224854168e-07, + "loss": 0.0868, + "step": 1255 + }, + { + "epoch": 2.8506787330316743, + "grad_norm": 0.7884237766265869, + "learning_rate": 2.0914492782835194e-07, + "loss": 0.0981, + "step": 1260 + }, + { + "epoch": 2.8619909502262444, + "grad_norm": 0.7355101704597473, + "learning_rate": 1.791540051713325e-07, + "loss": 0.0755, + "step": 1265 + }, + { + "epoch": 2.8733031674208145, + "grad_norm": 0.8331523537635803, + "learning_rate": 1.514701253306866e-07, + "loss": 0.0782, + "step": 1270 + }, + { + "epoch": 2.8846153846153846, + "grad_norm": 0.7330771684646606, + "learning_rate": 1.260975976350598e-07, + "loss": 0.0991, + "step": 1275 + }, + { + "epoch": 2.8959276018099547, + "grad_norm": 0.6602389812469482, + "learning_rate": 1.0304037162334467e-07, + "loss": 0.0947, + "step": 1280 + }, + { + "epoch": 2.9072398190045248, + "grad_norm": 0.6682478189468384, + "learning_rate": 8.23020364299093e-08, + "loss": 0.0856, + "step": 1285 + }, + { + "epoch": 2.918552036199095, + "grad_norm": 0.800484299659729, + "learning_rate": 6.388582022588241e-08, + "loss": 0.0977, + "step": 1290 + }, + { + "epoch": 2.9298642533936654, + "grad_norm": 0.7504603862762451, + "learning_rate": 4.779458971667205e-08, + "loss": 0.0956, + "step": 1295 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.7416691780090332, + "learning_rate": 3.4030849695710905e-08, + "loss": 0.085, + "step": 1300 + }, + { + "epoch": 2.9524886877828056, + "grad_norm": 0.8328441977500916, + "learning_rate": 2.2596742654564795e-08, + "loss": 0.0984, + "step": 1305 + }, + { + "epoch": 2.9638009049773757, + "grad_norm": 0.8047532439231873, + "learning_rate": 1.3494048449426145e-08, + "loss": 0.0921, + "step": 1310 + }, + { + "epoch": 2.975113122171946, + "grad_norm": 0.9050817489624023, + "learning_rate": 6.724184024057279e-09, + "loss": 0.0883, + "step": 1315 + }, + { + "epoch": 2.986425339366516, + "grad_norm": 0.8315650224685669, + "learning_rate": 2.28820318922518e-09, + "loss": 0.1066, + "step": 1320 + }, + { + "epoch": 2.997737556561086, + "grad_norm": 1.117981195449829, + "learning_rate": 1.8679645866437335e-10, + "loss": 0.091, + "step": 1325 + }, + { + "epoch": 3.0, + "step": 1326, + "total_flos": 1.6798389506943222e+18, + "train_loss": 0.437512752876742, + "train_runtime": 720.9786, + "train_samples_per_second": 58.828, + "train_steps_per_second": 1.839 + } + ], + "logging_steps": 5, + "max_steps": 1326, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6798389506943222e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c896b687533bf578f54a4d7036f95cb9880bf57d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/17_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78924da7846cf06bc52ff740558e85421155e6bfbe74b33ede8d274fe2e3f93 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7d3cc0db6bdde8f9ab158ea1877e84fc5d30be2c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 18_128_e3_3e-5 + results: [] +--- + + + +# 18_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2129b07a665203fc7641dd034da33c6619ba4af3 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "o_proj", + "q_proj", + "k_proj", + "gate_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee12355a5240e73b6467f360e4bbd343ce937521 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd0ceda0c5cc48e79db360d0718379e8c8081c08b5779930847d2fb40b4849c +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f579253b19ed9c69c589ce8ad74c359728e4fb8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.0513325619528335e+18, + "train_loss": 0.4135979340288397, + "train_runtime": 456.5386, + "train_samples": 9341, + "train_samples_per_second": 61.381, + "train_steps_per_second": 1.919 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f579253b19ed9c69c589ce8ad74c359728e4fb8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.0513325619528335e+18, + "train_loss": 0.4135979340288397, + "train_runtime": 456.5386, + "train_samples": 9341, + "train_samples_per_second": 61.381, + "train_steps_per_second": 1.919 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..86e9633624f831fadc795444396822bb8fedc7f1 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1268 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 876, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.017123287671232876, + "grad_norm": 0.7642919421195984, + "learning_rate": 2.7272727272727272e-06, + "loss": 1.5961, + "step": 5 + }, + { + "epoch": 0.03424657534246575, + "grad_norm": 0.7347398996353149, + "learning_rate": 6.136363636363637e-06, + "loss": 1.6679, + "step": 10 + }, + { + "epoch": 0.05136986301369863, + "grad_norm": 0.6066606044769287, + "learning_rate": 9.545454545454545e-06, + "loss": 1.6103, + "step": 15 + }, + { + "epoch": 0.0684931506849315, + "grad_norm": 0.5975635051727295, + "learning_rate": 1.2954545454545455e-05, + "loss": 1.5383, + "step": 20 + }, + { + "epoch": 0.08561643835616438, + "grad_norm": 0.4612351357936859, + "learning_rate": 1.6363636363636363e-05, + "loss": 1.5241, + "step": 25 + }, + { + "epoch": 0.10273972602739725, + "grad_norm": 0.4845544993877411, + "learning_rate": 1.9772727272727274e-05, + "loss": 1.4496, + "step": 30 + }, + { + "epoch": 0.11986301369863013, + "grad_norm": 0.48729759454727173, + "learning_rate": 2.318181818181818e-05, + "loss": 1.5188, + "step": 35 + }, + { + "epoch": 0.136986301369863, + "grad_norm": 0.46095529198646545, + "learning_rate": 2.6590909090909093e-05, + "loss": 1.4361, + "step": 40 + }, + { + "epoch": 0.1541095890410959, + "grad_norm": 0.5242185592651367, + "learning_rate": 3e-05, + "loss": 1.4595, + "step": 45 + }, + { + "epoch": 0.17123287671232876, + "grad_norm": 0.5748652219772339, + "learning_rate": 2.999732673837156e-05, + "loss": 1.2761, + "step": 50 + }, + { + "epoch": 0.18835616438356165, + "grad_norm": 0.5401932001113892, + "learning_rate": 2.9989307906329936e-05, + "loss": 1.2883, + "step": 55 + }, + { + "epoch": 0.2054794520547945, + "grad_norm": 0.6261909008026123, + "learning_rate": 2.9975946362066596e-05, + "loss": 1.3181, + "step": 60 + }, + { + "epoch": 0.2226027397260274, + "grad_norm": 0.6721273064613342, + "learning_rate": 2.995724686810202e-05, + "loss": 1.3041, + "step": 65 + }, + { + "epoch": 0.23972602739726026, + "grad_norm": 0.6685576438903809, + "learning_rate": 2.9933216089588158e-05, + "loss": 1.2394, + "step": 70 + }, + { + "epoch": 0.2568493150684932, + "grad_norm": 0.6345541477203369, + "learning_rate": 2.9903862591932762e-05, + "loss": 1.2424, + "step": 75 + }, + { + "epoch": 0.273972602739726, + "grad_norm": 0.7192750573158264, + "learning_rate": 2.986919683774636e-05, + "loss": 1.2184, + "step": 80 + }, + { + "epoch": 0.2910958904109589, + "grad_norm": 0.6634566783905029, + "learning_rate": 2.9829231183113013e-05, + "loss": 1.1373, + "step": 85 + }, + { + "epoch": 0.3082191780821918, + "grad_norm": 0.715645968914032, + "learning_rate": 2.9783979873186188e-05, + "loss": 1.1129, + "step": 90 + }, + { + "epoch": 0.3253424657534247, + "grad_norm": 0.6668285727500916, + "learning_rate": 2.973345903711128e-05, + "loss": 1.1197, + "step": 95 + }, + { + "epoch": 0.3424657534246575, + "grad_norm": 0.8762641549110413, + "learning_rate": 2.9677686682276623e-05, + "loss": 1.0909, + "step": 100 + }, + { + "epoch": 0.3595890410958904, + "grad_norm": 0.8008274435997009, + "learning_rate": 2.9616682687895038e-05, + "loss": 1.0349, + "step": 105 + }, + { + "epoch": 0.3767123287671233, + "grad_norm": 0.8878743052482605, + "learning_rate": 2.9550468797918162e-05, + "loss": 0.9566, + "step": 110 + }, + { + "epoch": 0.3938356164383562, + "grad_norm": 0.8322305083274841, + "learning_rate": 2.947906861328618e-05, + "loss": 0.9428, + "step": 115 + }, + { + "epoch": 0.410958904109589, + "grad_norm": 0.8815182447433472, + "learning_rate": 2.9402507583515604e-05, + "loss": 0.9491, + "step": 120 + }, + { + "epoch": 0.4280821917808219, + "grad_norm": 1.0223654508590698, + "learning_rate": 2.9320812997628184e-05, + "loss": 0.9124, + "step": 125 + }, + { + "epoch": 0.4452054794520548, + "grad_norm": 0.9121063947677612, + "learning_rate": 2.923401397442415e-05, + "loss": 0.872, + "step": 130 + }, + { + "epoch": 0.4623287671232877, + "grad_norm": 0.8788174986839294, + "learning_rate": 2.914214145210324e-05, + "loss": 0.8948, + "step": 135 + }, + { + "epoch": 0.4794520547945205, + "grad_norm": 1.1877964735031128, + "learning_rate": 2.9045228177237285e-05, + "loss": 0.8146, + "step": 140 + }, + { + "epoch": 0.4965753424657534, + "grad_norm": 0.9424985647201538, + "learning_rate": 2.894330869309814e-05, + "loss": 0.8579, + "step": 145 + }, + { + "epoch": 0.5136986301369864, + "grad_norm": 0.9724799990653992, + "learning_rate": 2.8836419327345297e-05, + "loss": 0.811, + "step": 150 + }, + { + "epoch": 0.5308219178082192, + "grad_norm": 0.9572834372520447, + "learning_rate": 2.8724598179077413e-05, + "loss": 0.8231, + "step": 155 + }, + { + "epoch": 0.547945205479452, + "grad_norm": 1.0806987285614014, + "learning_rate": 2.8607885105252473e-05, + "loss": 0.7648, + "step": 160 + }, + { + "epoch": 0.565068493150685, + "grad_norm": 0.9616779685020447, + "learning_rate": 2.848632170648139e-05, + "loss": 0.7319, + "step": 165 + }, + { + "epoch": 0.5821917808219178, + "grad_norm": 1.032207727432251, + "learning_rate": 2.8359951312200077e-05, + "loss": 0.7198, + "step": 170 + }, + { + "epoch": 0.5993150684931506, + "grad_norm": 1.0158820152282715, + "learning_rate": 2.8228818965225325e-05, + "loss": 0.7291, + "step": 175 + }, + { + "epoch": 0.6164383561643836, + "grad_norm": 1.1943464279174805, + "learning_rate": 2.8092971405700004e-05, + "loss": 0.7066, + "step": 180 + }, + { + "epoch": 0.6335616438356164, + "grad_norm": 1.2847884893417358, + "learning_rate": 2.7952457054433193e-05, + "loss": 0.7008, + "step": 185 + }, + { + "epoch": 0.6506849315068494, + "grad_norm": 1.1696120500564575, + "learning_rate": 2.780732599564137e-05, + "loss": 0.6741, + "step": 190 + }, + { + "epoch": 0.6678082191780822, + "grad_norm": 1.2236956357955933, + "learning_rate": 2.76576299590966e-05, + "loss": 0.66, + "step": 195 + }, + { + "epoch": 0.684931506849315, + "grad_norm": 1.2044501304626465, + "learning_rate": 2.7503422301688276e-05, + "loss": 0.6677, + "step": 200 + }, + { + "epoch": 0.702054794520548, + "grad_norm": 1.0200148820877075, + "learning_rate": 2.7344757988404845e-05, + "loss": 0.6071, + "step": 205 + }, + { + "epoch": 0.7191780821917808, + "grad_norm": 1.2887526750564575, + "learning_rate": 2.718169357274238e-05, + "loss": 0.5903, + "step": 210 + }, + { + "epoch": 0.7363013698630136, + "grad_norm": 1.1748912334442139, + "learning_rate": 2.7014287176546922e-05, + "loss": 0.5874, + "step": 215 + }, + { + "epoch": 0.7534246575342466, + "grad_norm": 1.3043445348739624, + "learning_rate": 2.6842598469297846e-05, + "loss": 0.6147, + "step": 220 + }, + { + "epoch": 0.7705479452054794, + "grad_norm": 1.131698489189148, + "learning_rate": 2.6666688646839574e-05, + "loss": 0.5963, + "step": 225 + }, + { + "epoch": 0.7876712328767124, + "grad_norm": 1.1456456184387207, + "learning_rate": 2.6486620409569222e-05, + "loss": 0.5571, + "step": 230 + }, + { + "epoch": 0.8047945205479452, + "grad_norm": 1.1147398948669434, + "learning_rate": 2.6302457940088024e-05, + "loss": 0.5521, + "step": 235 + }, + { + "epoch": 0.821917808219178, + "grad_norm": 1.1522595882415771, + "learning_rate": 2.611426688032439e-05, + "loss": 0.5644, + "step": 240 + }, + { + "epoch": 0.839041095890411, + "grad_norm": 1.292273998260498, + "learning_rate": 2.5922114308136826e-05, + "loss": 0.5857, + "step": 245 + }, + { + "epoch": 0.8561643835616438, + "grad_norm": 1.3559929132461548, + "learning_rate": 2.5726068713405084e-05, + "loss": 0.4856, + "step": 250 + }, + { + "epoch": 0.8732876712328768, + "grad_norm": 1.4571064710617065, + "learning_rate": 2.5526199973617932e-05, + "loss": 0.5034, + "step": 255 + }, + { + "epoch": 0.8904109589041096, + "grad_norm": 1.1525589227676392, + "learning_rate": 2.532257932896641e-05, + "loss": 0.5133, + "step": 260 + }, + { + "epoch": 0.9075342465753424, + "grad_norm": 1.215793251991272, + "learning_rate": 2.511527935695133e-05, + "loss": 0.4811, + "step": 265 + }, + { + "epoch": 0.9246575342465754, + "grad_norm": 1.1755338907241821, + "learning_rate": 2.4904373946514136e-05, + "loss": 0.4932, + "step": 270 + }, + { + "epoch": 0.9417808219178082, + "grad_norm": 1.1191002130508423, + "learning_rate": 2.468993827170028e-05, + "loss": 0.4327, + "step": 275 + }, + { + "epoch": 0.958904109589041, + "grad_norm": 1.2169800996780396, + "learning_rate": 2.4472048764864602e-05, + "loss": 0.4439, + "step": 280 + }, + { + "epoch": 0.976027397260274, + "grad_norm": 1.2438684701919556, + "learning_rate": 2.425078308942815e-05, + "loss": 0.4812, + "step": 285 + }, + { + "epoch": 0.9931506849315068, + "grad_norm": 1.2049016952514648, + "learning_rate": 2.402622011219622e-05, + "loss": 0.4717, + "step": 290 + }, + { + "epoch": 1.0102739726027397, + "grad_norm": 1.18350350856781, + "learning_rate": 2.379843987524753e-05, + "loss": 0.3435, + "step": 295 + }, + { + "epoch": 1.0273972602739727, + "grad_norm": 1.2939919233322144, + "learning_rate": 2.3567523567404346e-05, + "loss": 0.3938, + "step": 300 + }, + { + "epoch": 1.0445205479452055, + "grad_norm": 1.448604702949524, + "learning_rate": 2.3333553495294033e-05, + "loss": 0.4205, + "step": 305 + }, + { + "epoch": 1.0616438356164384, + "grad_norm": 1.2284152507781982, + "learning_rate": 2.309661305401205e-05, + "loss": 0.4026, + "step": 310 + }, + { + "epoch": 1.0787671232876712, + "grad_norm": 1.4402669668197632, + "learning_rate": 2.285678669739705e-05, + "loss": 0.3926, + "step": 315 + }, + { + "epoch": 1.095890410958904, + "grad_norm": 1.2584463357925415, + "learning_rate": 2.2614159907928588e-05, + "loss": 0.3379, + "step": 320 + }, + { + "epoch": 1.1130136986301369, + "grad_norm": 1.2806655168533325, + "learning_rate": 2.236881916625816e-05, + "loss": 0.351, + "step": 325 + }, + { + "epoch": 1.13013698630137, + "grad_norm": 1.1817296743392944, + "learning_rate": 2.212085192038453e-05, + "loss": 0.3289, + "step": 330 + }, + { + "epoch": 1.1472602739726028, + "grad_norm": 1.2045788764953613, + "learning_rate": 2.1870346554484154e-05, + "loss": 0.3344, + "step": 335 + }, + { + "epoch": 1.1643835616438356, + "grad_norm": 1.1659690141677856, + "learning_rate": 2.161739235740802e-05, + "loss": 0.3564, + "step": 340 + }, + { + "epoch": 1.1815068493150684, + "grad_norm": 1.4107425212860107, + "learning_rate": 2.1362079490855968e-05, + "loss": 0.3031, + "step": 345 + }, + { + "epoch": 1.1986301369863013, + "grad_norm": 1.4631099700927734, + "learning_rate": 2.110449895723991e-05, + "loss": 0.3766, + "step": 350 + }, + { + "epoch": 1.2157534246575343, + "grad_norm": 1.3707678318023682, + "learning_rate": 2.084474256724743e-05, + "loss": 0.346, + "step": 355 + }, + { + "epoch": 1.2328767123287672, + "grad_norm": 1.1106836795806885, + "learning_rate": 2.0582902907117193e-05, + "loss": 0.2948, + "step": 360 + }, + { + "epoch": 1.25, + "grad_norm": 1.1597270965576172, + "learning_rate": 2.0319073305638035e-05, + "loss": 0.2705, + "step": 365 + }, + { + "epoch": 1.2671232876712328, + "grad_norm": 1.1617885828018188, + "learning_rate": 2.00533478008833e-05, + "loss": 0.2978, + "step": 370 + }, + { + "epoch": 1.2842465753424657, + "grad_norm": 1.2699962854385376, + "learning_rate": 1.97858211066924e-05, + "loss": 0.2694, + "step": 375 + }, + { + "epoch": 1.3013698630136985, + "grad_norm": 1.1255543231964111, + "learning_rate": 1.9516588578911484e-05, + "loss": 0.2547, + "step": 380 + }, + { + "epoch": 1.3184931506849316, + "grad_norm": 1.1736236810684204, + "learning_rate": 1.9245746181405306e-05, + "loss": 0.2633, + "step": 385 + }, + { + "epoch": 1.3356164383561644, + "grad_norm": 1.0935441255569458, + "learning_rate": 1.8973390451852348e-05, + "loss": 0.294, + "step": 390 + }, + { + "epoch": 1.3527397260273972, + "grad_norm": 1.2481800317764282, + "learning_rate": 1.8699618467335428e-05, + "loss": 0.2678, + "step": 395 + }, + { + "epoch": 1.36986301369863, + "grad_norm": 1.2266769409179688, + "learning_rate": 1.8424527809740028e-05, + "loss": 0.2913, + "step": 400 + }, + { + "epoch": 1.3869863013698631, + "grad_norm": 1.2380919456481934, + "learning_rate": 1.8148216530972714e-05, + "loss": 0.2827, + "step": 405 + }, + { + "epoch": 1.404109589041096, + "grad_norm": 1.37958562374115, + "learning_rate": 1.7870783118012034e-05, + "loss": 0.2637, + "step": 410 + }, + { + "epoch": 1.4212328767123288, + "grad_norm": 1.2444887161254883, + "learning_rate": 1.7592326457804295e-05, + "loss": 0.237, + "step": 415 + }, + { + "epoch": 1.4383561643835616, + "grad_norm": 1.3632440567016602, + "learning_rate": 1.7312945802016817e-05, + "loss": 0.2295, + "step": 420 + }, + { + "epoch": 1.4554794520547945, + "grad_norm": 1.2420686483383179, + "learning_rate": 1.7032740731661178e-05, + "loss": 0.2196, + "step": 425 + }, + { + "epoch": 1.4726027397260273, + "grad_norm": 1.3462681770324707, + "learning_rate": 1.675181112159907e-05, + "loss": 0.2636, + "step": 430 + }, + { + "epoch": 1.4897260273972603, + "grad_norm": 1.1691420078277588, + "learning_rate": 1.6470257104943414e-05, + "loss": 0.2298, + "step": 435 + }, + { + "epoch": 1.5068493150684932, + "grad_norm": 1.3942689895629883, + "learning_rate": 1.618817903736741e-05, + "loss": 0.2342, + "step": 440 + }, + { + "epoch": 1.523972602739726, + "grad_norm": 1.3126505613327026, + "learning_rate": 1.5905677461334292e-05, + "loss": 0.228, + "step": 445 + }, + { + "epoch": 1.541095890410959, + "grad_norm": 1.2174842357635498, + "learning_rate": 1.5622853070260492e-05, + "loss": 0.2249, + "step": 450 + }, + { + "epoch": 1.558219178082192, + "grad_norm": 1.3463488817214966, + "learning_rate": 1.5339806672624982e-05, + "loss": 0.2034, + "step": 455 + }, + { + "epoch": 1.5753424657534247, + "grad_norm": 1.412550687789917, + "learning_rate": 1.5056639156037597e-05, + "loss": 0.2147, + "step": 460 + }, + { + "epoch": 1.5924657534246576, + "grad_norm": 1.2034904956817627, + "learning_rate": 1.4773451451279213e-05, + "loss": 0.2315, + "step": 465 + }, + { + "epoch": 1.6095890410958904, + "grad_norm": 1.2897893190383911, + "learning_rate": 1.4490344496326463e-05, + "loss": 0.1946, + "step": 470 + }, + { + "epoch": 1.6267123287671232, + "grad_norm": 1.2564674615859985, + "learning_rate": 1.4207419200373942e-05, + "loss": 0.1773, + "step": 475 + }, + { + "epoch": 1.643835616438356, + "grad_norm": 1.0805988311767578, + "learning_rate": 1.3924776407866634e-05, + "loss": 0.1841, + "step": 480 + }, + { + "epoch": 1.660958904109589, + "grad_norm": 1.2102560997009277, + "learning_rate": 1.3642516862555433e-05, + "loss": 0.196, + "step": 485 + }, + { + "epoch": 1.678082191780822, + "grad_norm": 1.3453584909439087, + "learning_rate": 1.3360741171588578e-05, + "loss": 0.1761, + "step": 490 + }, + { + "epoch": 1.6952054794520548, + "grad_norm": 1.2346432209014893, + "learning_rate": 1.3079549769651737e-05, + "loss": 0.2319, + "step": 495 + }, + { + "epoch": 1.7123287671232876, + "grad_norm": 1.1557613611221313, + "learning_rate": 1.2799042883169576e-05, + "loss": 0.2111, + "step": 500 + }, + { + "epoch": 1.7294520547945207, + "grad_norm": 1.175168514251709, + "learning_rate": 1.2519320494581581e-05, + "loss": 0.1821, + "step": 505 + }, + { + "epoch": 1.7465753424657535, + "grad_norm": 1.3231312036514282, + "learning_rate": 1.2240482306704831e-05, + "loss": 0.1811, + "step": 510 + }, + { + "epoch": 1.7636986301369864, + "grad_norm": 1.00853431224823, + "learning_rate": 1.1962627707196407e-05, + "loss": 0.1838, + "step": 515 + }, + { + "epoch": 1.7808219178082192, + "grad_norm": 1.2362895011901855, + "learning_rate": 1.1685855733128203e-05, + "loss": 0.19, + "step": 520 + }, + { + "epoch": 1.797945205479452, + "grad_norm": 1.1616286039352417, + "learning_rate": 1.1410265035686639e-05, + "loss": 0.1661, + "step": 525 + }, + { + "epoch": 1.8150684931506849, + "grad_norm": 1.2332571744918823, + "learning_rate": 1.1135953845009914e-05, + "loss": 0.1537, + "step": 530 + }, + { + "epoch": 1.8321917808219177, + "grad_norm": 1.173606276512146, + "learning_rate": 1.0863019935175415e-05, + "loss": 0.1661, + "step": 535 + }, + { + "epoch": 1.8493150684931505, + "grad_norm": 1.2598190307617188, + "learning_rate": 1.0591560589349568e-05, + "loss": 0.1651, + "step": 540 + }, + { + "epoch": 1.8664383561643836, + "grad_norm": 1.0256857872009277, + "learning_rate": 1.0321672565112767e-05, + "loss": 0.1702, + "step": 545 + }, + { + "epoch": 1.8835616438356164, + "grad_norm": 1.4540702104568481, + "learning_rate": 1.0053452059971555e-05, + "loss": 0.1452, + "step": 550 + }, + { + "epoch": 1.9006849315068495, + "grad_norm": 1.107009768486023, + "learning_rate": 9.786994677070523e-06, + "loss": 0.1482, + "step": 555 + }, + { + "epoch": 1.9178082191780823, + "grad_norm": 1.1373356580734253, + "learning_rate": 9.52239539111598e-06, + "loss": 0.1577, + "step": 560 + }, + { + "epoch": 1.9349315068493151, + "grad_norm": 1.0492547750473022, + "learning_rate": 9.259748514523654e-06, + "loss": 0.1352, + "step": 565 + }, + { + "epoch": 1.952054794520548, + "grad_norm": 1.2126743793487549, + "learning_rate": 8.999147663802494e-06, + "loss": 0.1422, + "step": 570 + }, + { + "epoch": 1.9691780821917808, + "grad_norm": 1.2040671110153198, + "learning_rate": 8.740685726186445e-06, + "loss": 0.1469, + "step": 575 + }, + { + "epoch": 1.9863013698630136, + "grad_norm": 1.0712188482284546, + "learning_rate": 8.484454826526199e-06, + "loss": 0.1483, + "step": 580 + }, + { + "epoch": 2.0034246575342465, + "grad_norm": 0.9522420167922974, + "learning_rate": 8.2305462944527e-06, + "loss": 0.1459, + "step": 585 + }, + { + "epoch": 2.0205479452054793, + "grad_norm": 0.9716567397117615, + "learning_rate": 7.979050631824074e-06, + "loss": 0.1259, + "step": 590 + }, + { + "epoch": 2.037671232876712, + "grad_norm": 0.9766287207603455, + "learning_rate": 7.730057480467604e-06, + "loss": 0.1155, + "step": 595 + }, + { + "epoch": 2.0547945205479454, + "grad_norm": 0.8308185935020447, + "learning_rate": 7.4836555902282534e-06, + "loss": 0.1135, + "step": 600 + }, + { + "epoch": 2.0719178082191783, + "grad_norm": 0.9477062225341797, + "learning_rate": 7.239932787335147e-06, + "loss": 0.1272, + "step": 605 + }, + { + "epoch": 2.089041095890411, + "grad_norm": 0.8299199342727661, + "learning_rate": 6.9989759430972105e-06, + "loss": 0.101, + "step": 610 + }, + { + "epoch": 2.106164383561644, + "grad_norm": 0.9344136118888855, + "learning_rate": 6.760870942939202e-06, + "loss": 0.1066, + "step": 615 + }, + { + "epoch": 2.1232876712328768, + "grad_norm": 1.676020622253418, + "learning_rate": 6.525702655789201e-06, + "loss": 0.0989, + "step": 620 + }, + { + "epoch": 2.1404109589041096, + "grad_norm": 0.92426598072052, + "learning_rate": 6.293554903828302e-06, + "loss": 0.0948, + "step": 625 + }, + { + "epoch": 2.1575342465753424, + "grad_norm": 1.018420934677124, + "learning_rate": 6.0645104326135e-06, + "loss": 0.0993, + "step": 630 + }, + { + "epoch": 2.1746575342465753, + "grad_norm": 1.3123375177383423, + "learning_rate": 5.8386508815842746e-06, + "loss": 0.1048, + "step": 635 + }, + { + "epoch": 2.191780821917808, + "grad_norm": 0.8747774362564087, + "learning_rate": 5.61605675496345e-06, + "loss": 0.0952, + "step": 640 + }, + { + "epoch": 2.208904109589041, + "grad_norm": 0.959812581539154, + "learning_rate": 5.396807393062681e-06, + "loss": 0.0931, + "step": 645 + }, + { + "epoch": 2.2260273972602738, + "grad_norm": 0.9676283597946167, + "learning_rate": 5.180980944002799e-06, + "loss": 0.1155, + "step": 650 + }, + { + "epoch": 2.243150684931507, + "grad_norm": 1.3411155939102173, + "learning_rate": 4.9686543358590934e-06, + "loss": 0.0971, + "step": 655 + }, + { + "epoch": 2.26027397260274, + "grad_norm": 0.9003651738166809, + "learning_rate": 4.759903249241464e-06, + "loss": 0.0937, + "step": 660 + }, + { + "epoch": 2.2773972602739727, + "grad_norm": 1.3036240339279175, + "learning_rate": 4.554802090319209e-06, + "loss": 0.0978, + "step": 665 + }, + { + "epoch": 2.2945205479452055, + "grad_norm": 0.8570740222930908, + "learning_rate": 4.353423964300074e-06, + "loss": 0.0974, + "step": 670 + }, + { + "epoch": 2.3116438356164384, + "grad_norm": 0.8326997756958008, + "learning_rate": 4.155840649373015e-06, + "loss": 0.1096, + "step": 675 + }, + { + "epoch": 2.328767123287671, + "grad_norm": 0.8096888661384583, + "learning_rate": 3.96212257112391e-06, + "loss": 0.0808, + "step": 680 + }, + { + "epoch": 2.345890410958904, + "grad_norm": 0.8468197584152222, + "learning_rate": 3.772338777433482e-06, + "loss": 0.0948, + "step": 685 + }, + { + "epoch": 2.363013698630137, + "grad_norm": 0.9208524823188782, + "learning_rate": 3.5865569138661814e-06, + "loss": 0.0936, + "step": 690 + }, + { + "epoch": 2.3801369863013697, + "grad_norm": 0.7897376418113708, + "learning_rate": 3.4048431995589453e-06, + "loss": 0.0885, + "step": 695 + }, + { + "epoch": 2.3972602739726026, + "grad_norm": 0.761715292930603, + "learning_rate": 3.22726240361843e-06, + "loss": 0.0914, + "step": 700 + }, + { + "epoch": 2.4143835616438354, + "grad_norm": 0.9275925159454346, + "learning_rate": 3.053877822034995e-06, + "loss": 0.0804, + "step": 705 + }, + { + "epoch": 2.4315068493150687, + "grad_norm": 0.8617101907730103, + "learning_rate": 2.884751255121827e-06, + "loss": 0.0862, + "step": 710 + }, + { + "epoch": 2.4486301369863015, + "grad_norm": 0.6595993638038635, + "learning_rate": 2.7199429854871544e-06, + "loss": 0.0788, + "step": 715 + }, + { + "epoch": 2.4657534246575343, + "grad_norm": 0.7483975291252136, + "learning_rate": 2.559511756547407e-06, + "loss": 0.0842, + "step": 720 + }, + { + "epoch": 2.482876712328767, + "grad_norm": 1.092637538909912, + "learning_rate": 2.403514751589032e-06, + "loss": 0.0892, + "step": 725 + }, + { + "epoch": 2.5, + "grad_norm": 0.9056339859962463, + "learning_rate": 2.252007573386365e-06, + "loss": 0.098, + "step": 730 + }, + { + "epoch": 2.517123287671233, + "grad_norm": 0.7192738056182861, + "learning_rate": 2.105044224382854e-06, + "loss": 0.0885, + "step": 735 + }, + { + "epoch": 2.5342465753424657, + "grad_norm": 0.8372335433959961, + "learning_rate": 1.9626770874427368e-06, + "loss": 0.0876, + "step": 740 + }, + { + "epoch": 2.5513698630136985, + "grad_norm": 0.7301207780838013, + "learning_rate": 1.8249569071799134e-06, + "loss": 0.0899, + "step": 745 + }, + { + "epoch": 2.5684931506849313, + "grad_norm": 0.8143348097801208, + "learning_rate": 1.69193277187083e-06, + "loss": 0.0762, + "step": 750 + }, + { + "epoch": 2.5856164383561646, + "grad_norm": 0.8249316811561584, + "learning_rate": 1.5636520959577094e-06, + "loss": 0.094, + "step": 755 + }, + { + "epoch": 2.602739726027397, + "grad_norm": 0.6912837624549866, + "learning_rate": 1.44016060314835e-06, + "loss": 0.072, + "step": 760 + }, + { + "epoch": 2.6198630136986303, + "grad_norm": 0.7001112699508667, + "learning_rate": 1.321502310118649e-06, + "loss": 0.0759, + "step": 765 + }, + { + "epoch": 2.636986301369863, + "grad_norm": 0.824414074420929, + "learning_rate": 1.2077195108234934e-06, + "loss": 0.085, + "step": 770 + }, + { + "epoch": 2.654109589041096, + "grad_norm": 0.8162326216697693, + "learning_rate": 1.098852761421719e-06, + "loss": 0.0803, + "step": 775 + }, + { + "epoch": 2.671232876712329, + "grad_norm": 0.6646018624305725, + "learning_rate": 9.949408658205072e-07, + "loss": 0.0758, + "step": 780 + }, + { + "epoch": 2.6883561643835616, + "grad_norm": 0.6834368109703064, + "learning_rate": 8.960208618442883e-07, + "loss": 0.0717, + "step": 785 + }, + { + "epoch": 2.7054794520547945, + "grad_norm": 0.8140629529953003, + "learning_rate": 8.021280080331816e-07, + "loss": 0.0914, + "step": 790 + }, + { + "epoch": 2.7226027397260273, + "grad_norm": 0.6512930393218994, + "learning_rate": 7.132957710756277e-07, + "loss": 0.0878, + "step": 795 + }, + { + "epoch": 2.73972602739726, + "grad_norm": 0.6765428185462952, + "learning_rate": 6.295558138796803e-07, + "loss": 0.0654, + "step": 800 + }, + { + "epoch": 2.756849315068493, + "grad_norm": 0.775492250919342, + "learning_rate": 5.509379842872558e-07, + "loss": 0.0775, + "step": 805 + }, + { + "epoch": 2.7739726027397262, + "grad_norm": 0.6667018532752991, + "learning_rate": 4.774703044353051e-07, + "loss": 0.069, + "step": 810 + }, + { + "epoch": 2.791095890410959, + "grad_norm": 0.6476848721504211, + "learning_rate": 4.091789607677582e-07, + "loss": 0.0774, + "step": 815 + }, + { + "epoch": 2.808219178082192, + "grad_norm": 0.8198016881942749, + "learning_rate": 3.460882947017635e-07, + "loss": 0.0817, + "step": 820 + }, + { + "epoch": 2.8253424657534247, + "grad_norm": 0.7266663312911987, + "learning_rate": 2.8822079395154357e-07, + "loss": 0.0693, + "step": 825 + }, + { + "epoch": 2.8424657534246576, + "grad_norm": 0.6383249759674072, + "learning_rate": 2.3559708451300622e-07, + "loss": 0.0738, + "step": 830 + }, + { + "epoch": 2.8595890410958904, + "grad_norm": 0.6552285552024841, + "learning_rate": 1.8823592331191242e-07, + "loss": 0.0809, + "step": 835 + }, + { + "epoch": 2.8767123287671232, + "grad_norm": 0.5937789082527161, + "learning_rate": 1.4615419151824406e-07, + "loss": 0.0695, + "step": 840 + }, + { + "epoch": 2.893835616438356, + "grad_norm": 0.8212797045707703, + "learning_rate": 1.0936688852919042e-07, + "loss": 0.0837, + "step": 845 + }, + { + "epoch": 2.910958904109589, + "grad_norm": 0.8434480428695679, + "learning_rate": 7.788712662281317e-08, + "loss": 0.0838, + "step": 850 + }, + { + "epoch": 2.928082191780822, + "grad_norm": 0.691318690776825, + "learning_rate": 5.1726126284389886e-08, + "loss": 0.0829, + "step": 855 + }, + { + "epoch": 2.9452054794520546, + "grad_norm": 0.6233073472976685, + "learning_rate": 3.0893212207036556e-08, + "loss": 0.0824, + "step": 860 + }, + { + "epoch": 2.962328767123288, + "grad_norm": 0.7444546222686768, + "learning_rate": 1.5395809968061226e-08, + "loss": 0.0826, + "step": 865 + }, + { + "epoch": 2.9794520547945207, + "grad_norm": 0.7764705419540405, + "learning_rate": 5.239443382229481e-09, + "loss": 0.0731, + "step": 870 + }, + { + "epoch": 2.9965753424657535, + "grad_norm": 0.6682039499282837, + "learning_rate": 4.277325328860826e-10, + "loss": 0.0856, + "step": 875 + }, + { + "epoch": 3.0, + "step": 876, + "total_flos": 1.0513325619528335e+18, + "train_loss": 0.4135979340288397, + "train_runtime": 456.5386, + "train_samples_per_second": 61.381, + "train_steps_per_second": 1.919 + } + ], + "logging_steps": 5, + "max_steps": 876, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0513325619528335e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..93d401a9e14717242e911c2f22900d022ea63183 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/18_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eac3dff894f470a77991dbb4102f3b587a2a99d5a4e10829d5d21af4e7e4f3c +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96d101d32b560433f0624c25a5805fd848a3530d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 19_128_e3_3e-5 + results: [] +--- + + + +# 19_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..51ea6a378ff748a2f3d6fe4c969cd2eaefb0e516 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "q_proj", + "o_proj", + "gate_proj", + "k_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10c39f4a8207e5452e3286e06c7557dc69b4db6c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b066c9310edcd2613b07da4650d0efffcc1d6014cf28744bfb9a124272dbaa +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1b0c9a4158eee39ea4afa3acf07262d08700954d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4339270698462085e+18, + "train_loss": 0.3988168353994265, + "train_runtime": 612.9862, + "train_samples": 12032, + "train_samples_per_second": 58.886, + "train_steps_per_second": 1.84 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1b0c9a4158eee39ea4afa3acf07262d08700954d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.4339270698462085e+18, + "train_loss": 0.3988168353994265, + "train_runtime": 612.9862, + "train_samples": 12032, + "train_samples_per_second": 58.886, + "train_steps_per_second": 1.84 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5cad5c1569398e3be916837bda518a7286d1947a --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1618 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1128, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013297872340425532, + "grad_norm": 0.608323335647583, + "learning_rate": 2.1052631578947366e-06, + "loss": 1.5341, + "step": 5 + }, + { + "epoch": 0.026595744680851064, + "grad_norm": 0.6391318440437317, + "learning_rate": 4.736842105263158e-06, + "loss": 1.5587, + "step": 10 + }, + { + "epoch": 0.0398936170212766, + "grad_norm": 0.5553399324417114, + "learning_rate": 7.3684210526315784e-06, + "loss": 1.4971, + "step": 15 + }, + { + "epoch": 0.05319148936170213, + "grad_norm": 0.5232102274894714, + "learning_rate": 9.999999999999999e-06, + "loss": 1.5264, + "step": 20 + }, + { + "epoch": 0.06648936170212766, + "grad_norm": 0.4755397439002991, + "learning_rate": 1.263157894736842e-05, + "loss": 1.4296, + "step": 25 + }, + { + "epoch": 0.0797872340425532, + "grad_norm": 0.481525719165802, + "learning_rate": 1.5263157894736842e-05, + "loss": 1.4337, + "step": 30 + }, + { + "epoch": 0.09308510638297872, + "grad_norm": 0.4500798285007477, + "learning_rate": 1.7894736842105264e-05, + "loss": 1.4398, + "step": 35 + }, + { + "epoch": 0.10638297872340426, + "grad_norm": 0.473066508769989, + "learning_rate": 2.0526315789473685e-05, + "loss": 1.3941, + "step": 40 + }, + { + "epoch": 0.1196808510638298, + "grad_norm": 0.43356993794441223, + "learning_rate": 2.3157894736842103e-05, + "loss": 1.3567, + "step": 45 + }, + { + "epoch": 0.13297872340425532, + "grad_norm": 0.4830070734024048, + "learning_rate": 2.578947368421053e-05, + "loss": 1.3647, + "step": 50 + }, + { + "epoch": 0.14627659574468085, + "grad_norm": 0.4371941089630127, + "learning_rate": 2.8421052631578946e-05, + "loss": 1.2915, + "step": 55 + }, + { + "epoch": 0.1595744680851064, + "grad_norm": 0.5103633999824524, + "learning_rate": 2.9999741868614275e-05, + "loss": 1.2941, + "step": 60 + }, + { + "epoch": 0.17287234042553193, + "grad_norm": 0.5449600219726562, + "learning_rate": 2.999683799255387e-05, + "loss": 1.2566, + "step": 65 + }, + { + "epoch": 0.18617021276595744, + "grad_norm": 0.5222111940383911, + "learning_rate": 2.9990708202925038e-05, + "loss": 1.293, + "step": 70 + }, + { + "epoch": 0.19946808510638298, + "grad_norm": 0.5745086669921875, + "learning_rate": 2.9981353818283835e-05, + "loss": 1.2247, + "step": 75 + }, + { + "epoch": 0.2127659574468085, + "grad_norm": 0.614306628704071, + "learning_rate": 2.996877685081685e-05, + "loss": 1.269, + "step": 80 + }, + { + "epoch": 0.22606382978723405, + "grad_norm": 0.6108382940292358, + "learning_rate": 2.995298000590839e-05, + "loss": 1.1904, + "step": 85 + }, + { + "epoch": 0.2393617021276596, + "grad_norm": 0.6126341819763184, + "learning_rate": 2.99339666815585e-05, + "loss": 1.148, + "step": 90 + }, + { + "epoch": 0.2526595744680851, + "grad_norm": 0.6374644637107849, + "learning_rate": 2.9911740967652065e-05, + "loss": 1.155, + "step": 95 + }, + { + "epoch": 0.26595744680851063, + "grad_norm": 0.743462860584259, + "learning_rate": 2.9886307645079037e-05, + "loss": 1.1389, + "step": 100 + }, + { + "epoch": 0.27925531914893614, + "grad_norm": 0.7299005389213562, + "learning_rate": 2.9857672184706038e-05, + "loss": 1.1054, + "step": 105 + }, + { + "epoch": 0.2925531914893617, + "grad_norm": 0.7631704807281494, + "learning_rate": 2.9825840746199534e-05, + "loss": 1.065, + "step": 110 + }, + { + "epoch": 0.3058510638297872, + "grad_norm": 0.7537283301353455, + "learning_rate": 2.9790820176700872e-05, + "loss": 1.0324, + "step": 115 + }, + { + "epoch": 0.3191489361702128, + "grad_norm": 0.7896299362182617, + "learning_rate": 2.975261800935339e-05, + "loss": 1.0123, + "step": 120 + }, + { + "epoch": 0.3324468085106383, + "grad_norm": 0.7505979537963867, + "learning_rate": 2.971124246168202e-05, + "loss": 1.0461, + "step": 125 + }, + { + "epoch": 0.34574468085106386, + "grad_norm": 0.8013362884521484, + "learning_rate": 2.9666702433825614e-05, + "loss": 0.9965, + "step": 130 + }, + { + "epoch": 0.35904255319148937, + "grad_norm": 1.000184178352356, + "learning_rate": 2.9619007506622506e-05, + "loss": 0.8923, + "step": 135 + }, + { + "epoch": 0.3723404255319149, + "grad_norm": 0.7650777101516724, + "learning_rate": 2.956816793954958e-05, + "loss": 0.9335, + "step": 140 + }, + { + "epoch": 0.38563829787234044, + "grad_norm": 0.8982766270637512, + "learning_rate": 2.951419466851542e-05, + "loss": 0.9795, + "step": 145 + }, + { + "epoch": 0.39893617021276595, + "grad_norm": 0.8234419822692871, + "learning_rate": 2.9457099303507904e-05, + "loss": 0.9122, + "step": 150 + }, + { + "epoch": 0.4122340425531915, + "grad_norm": 0.8702085614204407, + "learning_rate": 2.939689412609684e-05, + "loss": 0.8355, + "step": 155 + }, + { + "epoch": 0.425531914893617, + "grad_norm": 0.9740650057792664, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.8251, + "step": 160 + }, + { + "epoch": 0.43882978723404253, + "grad_norm": 1.0087041854858398, + "learning_rate": 2.9267206802257952e-05, + "loss": 0.9189, + "step": 165 + }, + { + "epoch": 0.4521276595744681, + "grad_norm": 0.9558885097503662, + "learning_rate": 2.919775255238392e-05, + "loss": 0.785, + "step": 170 + }, + { + "epoch": 0.4654255319148936, + "grad_norm": 0.9477502107620239, + "learning_rate": 2.9125244277213176e-05, + "loss": 0.8847, + "step": 175 + }, + { + "epoch": 0.4787234042553192, + "grad_norm": 1.0151642560958862, + "learning_rate": 2.9049697573728818e-05, + "loss": 0.7849, + "step": 180 + }, + { + "epoch": 0.4920212765957447, + "grad_norm": 0.9238343238830566, + "learning_rate": 2.8971128692498872e-05, + "loss": 0.7599, + "step": 185 + }, + { + "epoch": 0.5053191489361702, + "grad_norm": 0.8920621871948242, + "learning_rate": 2.8889554534180664e-05, + "loss": 0.7694, + "step": 190 + }, + { + "epoch": 0.5186170212765957, + "grad_norm": 0.9160822629928589, + "learning_rate": 2.8804992645885415e-05, + "loss": 0.7782, + "step": 195 + }, + { + "epoch": 0.5319148936170213, + "grad_norm": 1.0318926572799683, + "learning_rate": 2.8717461217403726e-05, + "loss": 0.7118, + "step": 200 + }, + { + "epoch": 0.5452127659574468, + "grad_norm": 1.139615774154663, + "learning_rate": 2.8626979077292856e-05, + "loss": 0.6894, + "step": 205 + }, + { + "epoch": 0.5585106382978723, + "grad_norm": 1.038461685180664, + "learning_rate": 2.853356568882657e-05, + "loss": 0.6714, + "step": 210 + }, + { + "epoch": 0.5718085106382979, + "grad_norm": 0.999913215637207, + "learning_rate": 2.843724114580848e-05, + "loss": 0.7014, + "step": 215 + }, + { + "epoch": 0.5851063829787234, + "grad_norm": 1.08591890335083, + "learning_rate": 2.833802616824972e-05, + "loss": 0.7678, + "step": 220 + }, + { + "epoch": 0.598404255319149, + "grad_norm": 1.0443202257156372, + "learning_rate": 2.8235942097911964e-05, + "loss": 0.701, + "step": 225 + }, + { + "epoch": 0.6117021276595744, + "grad_norm": 1.05222487449646, + "learning_rate": 2.8131010893716676e-05, + "loss": 0.6982, + "step": 230 + }, + { + "epoch": 0.625, + "grad_norm": 1.0143768787384033, + "learning_rate": 2.8023255127021593e-05, + "loss": 0.665, + "step": 235 + }, + { + "epoch": 0.6382978723404256, + "grad_norm": 1.0820704698562622, + "learning_rate": 2.7912697976765516e-05, + "loss": 0.6561, + "step": 240 + }, + { + "epoch": 0.651595744680851, + "grad_norm": 1.1055158376693726, + "learning_rate": 2.7799363224482334e-05, + "loss": 0.632, + "step": 245 + }, + { + "epoch": 0.6648936170212766, + "grad_norm": 1.337937593460083, + "learning_rate": 2.7683275249185507e-05, + "loss": 0.5676, + "step": 250 + }, + { + "epoch": 0.6781914893617021, + "grad_norm": 1.1012290716171265, + "learning_rate": 2.7564459022123953e-05, + "loss": 0.6231, + "step": 255 + }, + { + "epoch": 0.6914893617021277, + "grad_norm": 1.0958342552185059, + "learning_rate": 2.744294010141061e-05, + "loss": 0.5963, + "step": 260 + }, + { + "epoch": 0.7047872340425532, + "grad_norm": 1.0873651504516602, + "learning_rate": 2.7318744626524704e-05, + "loss": 0.6029, + "step": 265 + }, + { + "epoch": 0.7180851063829787, + "grad_norm": 1.0857548713684082, + "learning_rate": 2.719189931268899e-05, + "loss": 0.5968, + "step": 270 + }, + { + "epoch": 0.7313829787234043, + "grad_norm": 1.068359375, + "learning_rate": 2.7062431445123127e-05, + "loss": 0.6041, + "step": 275 + }, + { + "epoch": 0.7446808510638298, + "grad_norm": 1.1181713342666626, + "learning_rate": 2.6930368873174493e-05, + "loss": 0.5676, + "step": 280 + }, + { + "epoch": 0.7579787234042553, + "grad_norm": 1.3151569366455078, + "learning_rate": 2.6795740004327584e-05, + "loss": 0.6192, + "step": 285 + }, + { + "epoch": 0.7712765957446809, + "grad_norm": 1.510077714920044, + "learning_rate": 2.665857379809338e-05, + "loss": 0.6056, + "step": 290 + }, + { + "epoch": 0.7845744680851063, + "grad_norm": 1.226473331451416, + "learning_rate": 2.6518899759780017e-05, + "loss": 0.5426, + "step": 295 + }, + { + "epoch": 0.7978723404255319, + "grad_norm": 1.2364583015441895, + "learning_rate": 2.637674793414596e-05, + "loss": 0.5725, + "step": 300 + }, + { + "epoch": 0.8111702127659575, + "grad_norm": 1.0637487173080444, + "learning_rate": 2.6232148898937223e-05, + "loss": 0.5313, + "step": 305 + }, + { + "epoch": 0.824468085106383, + "grad_norm": 1.1694235801696777, + "learning_rate": 2.6085133758309887e-05, + "loss": 0.5098, + "step": 310 + }, + { + "epoch": 0.8377659574468085, + "grad_norm": 1.1489259004592896, + "learning_rate": 2.5935734136139407e-05, + "loss": 0.5, + "step": 315 + }, + { + "epoch": 0.851063829787234, + "grad_norm": 1.0986201763153076, + "learning_rate": 2.5783982169218125e-05, + "loss": 0.5293, + "step": 320 + }, + { + "epoch": 0.8643617021276596, + "grad_norm": 1.2561513185501099, + "learning_rate": 2.5629910500342424e-05, + "loss": 0.4843, + "step": 325 + }, + { + "epoch": 0.8776595744680851, + "grad_norm": 1.5020285844802856, + "learning_rate": 2.5473552271291092e-05, + "loss": 0.4686, + "step": 330 + }, + { + "epoch": 0.8909574468085106, + "grad_norm": 1.2500102519989014, + "learning_rate": 2.531494111569629e-05, + "loss": 0.448, + "step": 335 + }, + { + "epoch": 0.9042553191489362, + "grad_norm": 1.2347040176391602, + "learning_rate": 2.5154111151808752e-05, + "loss": 0.4862, + "step": 340 + }, + { + "epoch": 0.9175531914893617, + "grad_norm": 1.1309820413589478, + "learning_rate": 2.4991096975158757e-05, + "loss": 0.4587, + "step": 345 + }, + { + "epoch": 0.9308510638297872, + "grad_norm": 1.082922339439392, + "learning_rate": 2.4825933651114375e-05, + "loss": 0.5029, + "step": 350 + }, + { + "epoch": 0.9441489361702128, + "grad_norm": 1.2537115812301636, + "learning_rate": 2.4658656707338733e-05, + "loss": 0.489, + "step": 355 + }, + { + "epoch": 0.9574468085106383, + "grad_norm": 1.2744743824005127, + "learning_rate": 2.4489302126147768e-05, + "loss": 0.4566, + "step": 360 + }, + { + "epoch": 0.9707446808510638, + "grad_norm": 1.1172293424606323, + "learning_rate": 2.431790633677019e-05, + "loss": 0.4543, + "step": 365 + }, + { + "epoch": 0.9840425531914894, + "grad_norm": 1.0972198247909546, + "learning_rate": 2.414450620751136e-05, + "loss": 0.4443, + "step": 370 + }, + { + "epoch": 0.9973404255319149, + "grad_norm": 1.4971073865890503, + "learning_rate": 2.396913903782268e-05, + "loss": 0.4701, + "step": 375 + }, + { + "epoch": 1.0106382978723405, + "grad_norm": 1.1080225706100464, + "learning_rate": 2.379184255027822e-05, + "loss": 0.3726, + "step": 380 + }, + { + "epoch": 1.023936170212766, + "grad_norm": 1.141753911972046, + "learning_rate": 2.361265488246039e-05, + "loss": 0.3552, + "step": 385 + }, + { + "epoch": 1.0372340425531914, + "grad_norm": 1.1549185514450073, + "learning_rate": 2.3431614578756304e-05, + "loss": 0.3919, + "step": 390 + }, + { + "epoch": 1.050531914893617, + "grad_norm": 1.228663444519043, + "learning_rate": 2.3248760582066605e-05, + "loss": 0.3459, + "step": 395 + }, + { + "epoch": 1.0638297872340425, + "grad_norm": 1.1136003732681274, + "learning_rate": 2.306413222542866e-05, + "loss": 0.3917, + "step": 400 + }, + { + "epoch": 1.077127659574468, + "grad_norm": 1.0495463609695435, + "learning_rate": 2.287776922355573e-05, + "loss": 0.3493, + "step": 405 + }, + { + "epoch": 1.0904255319148937, + "grad_norm": 1.1702309846878052, + "learning_rate": 2.268971166429412e-05, + "loss": 0.3402, + "step": 410 + }, + { + "epoch": 1.1037234042553192, + "grad_norm": 1.389768362045288, + "learning_rate": 2.25e-05, + "loss": 0.3179, + "step": 415 + }, + { + "epoch": 1.1170212765957448, + "grad_norm": 1.1458165645599365, + "learning_rate": 2.2308675038837887e-05, + "loss": 0.3551, + "step": 420 + }, + { + "epoch": 1.1303191489361701, + "grad_norm": 1.2046173810958862, + "learning_rate": 2.2115777936002533e-05, + "loss": 0.3412, + "step": 425 + }, + { + "epoch": 1.1436170212765957, + "grad_norm": 1.213117003440857, + "learning_rate": 2.192135018486618e-05, + "loss": 0.3246, + "step": 430 + }, + { + "epoch": 1.1569148936170213, + "grad_norm": 1.1923022270202637, + "learning_rate": 2.172543360805308e-05, + "loss": 0.3664, + "step": 435 + }, + { + "epoch": 1.1702127659574468, + "grad_norm": 1.1729960441589355, + "learning_rate": 2.152807034844322e-05, + "loss": 0.3169, + "step": 440 + }, + { + "epoch": 1.1835106382978724, + "grad_norm": 1.2618674039840698, + "learning_rate": 2.1329302860107065e-05, + "loss": 0.3687, + "step": 445 + }, + { + "epoch": 1.196808510638298, + "grad_norm": 1.116434931755066, + "learning_rate": 2.1129173899173474e-05, + "loss": 0.2886, + "step": 450 + }, + { + "epoch": 1.2101063829787235, + "grad_norm": 1.2099875211715698, + "learning_rate": 2.0927726514632557e-05, + "loss": 0.2937, + "step": 455 + }, + { + "epoch": 1.2234042553191489, + "grad_norm": 1.1562926769256592, + "learning_rate": 2.072500403907559e-05, + "loss": 0.2752, + "step": 460 + }, + { + "epoch": 1.2367021276595744, + "grad_norm": 1.2177269458770752, + "learning_rate": 2.0521050079373895e-05, + "loss": 0.2555, + "step": 465 + }, + { + "epoch": 1.25, + "grad_norm": 1.095200538635254, + "learning_rate": 2.0315908507298713e-05, + "loss": 0.2908, + "step": 470 + }, + { + "epoch": 1.2632978723404256, + "grad_norm": 1.1007760763168335, + "learning_rate": 2.0109623450084154e-05, + "loss": 0.2899, + "step": 475 + }, + { + "epoch": 1.2765957446808511, + "grad_norm": 1.082155704498291, + "learning_rate": 1.990223928093511e-05, + "loss": 0.2954, + "step": 480 + }, + { + "epoch": 1.2898936170212765, + "grad_norm": 1.3381167650222778, + "learning_rate": 1.9693800609482318e-05, + "loss": 0.2762, + "step": 485 + }, + { + "epoch": 1.3031914893617023, + "grad_norm": 1.2754381895065308, + "learning_rate": 1.9484352272186555e-05, + "loss": 0.3044, + "step": 490 + }, + { + "epoch": 1.3164893617021276, + "grad_norm": 1.1535813808441162, + "learning_rate": 1.9273939322694035e-05, + "loss": 0.324, + "step": 495 + }, + { + "epoch": 1.3297872340425532, + "grad_norm": 1.172067642211914, + "learning_rate": 1.906260702214508e-05, + "loss": 0.2723, + "step": 500 + }, + { + "epoch": 1.3430851063829787, + "grad_norm": 1.2536730766296387, + "learning_rate": 1.8850400829438157e-05, + "loss": 0.2719, + "step": 505 + }, + { + "epoch": 1.3563829787234043, + "grad_norm": 1.2371692657470703, + "learning_rate": 1.8637366391451414e-05, + "loss": 0.295, + "step": 510 + }, + { + "epoch": 1.3696808510638299, + "grad_norm": 1.2225559949874878, + "learning_rate": 1.842354953322373e-05, + "loss": 0.28, + "step": 515 + }, + { + "epoch": 1.3829787234042552, + "grad_norm": 1.0607322454452515, + "learning_rate": 1.8208996248097462e-05, + "loss": 0.2532, + "step": 520 + }, + { + "epoch": 1.3962765957446808, + "grad_norm": 1.3381015062332153, + "learning_rate": 1.7993752687825003e-05, + "loss": 0.2326, + "step": 525 + }, + { + "epoch": 1.4095744680851063, + "grad_norm": 1.1251689195632935, + "learning_rate": 1.777786515264123e-05, + "loss": 0.2694, + "step": 530 + }, + { + "epoch": 1.422872340425532, + "grad_norm": 1.0721025466918945, + "learning_rate": 1.7561380081304063e-05, + "loss": 0.2715, + "step": 535 + }, + { + "epoch": 1.4361702127659575, + "grad_norm": 1.4788038730621338, + "learning_rate": 1.7344344041105177e-05, + "loss": 0.2531, + "step": 540 + }, + { + "epoch": 1.449468085106383, + "grad_norm": 1.187760829925537, + "learning_rate": 1.7126803717853086e-05, + "loss": 0.2641, + "step": 545 + }, + { + "epoch": 1.4627659574468086, + "grad_norm": 1.158298373222351, + "learning_rate": 1.6908805905830752e-05, + "loss": 0.2256, + "step": 550 + }, + { + "epoch": 1.476063829787234, + "grad_norm": 1.3275681734085083, + "learning_rate": 1.6690397497729818e-05, + "loss": 0.2876, + "step": 555 + }, + { + "epoch": 1.4893617021276595, + "grad_norm": 1.4265680313110352, + "learning_rate": 1.647162547456372e-05, + "loss": 0.2465, + "step": 560 + }, + { + "epoch": 1.502659574468085, + "grad_norm": 1.1086647510528564, + "learning_rate": 1.6252536895561754e-05, + "loss": 0.26, + "step": 565 + }, + { + "epoch": 1.5159574468085106, + "grad_norm": 1.2507472038269043, + "learning_rate": 1.6033178888046368e-05, + "loss": 0.2651, + "step": 570 + }, + { + "epoch": 1.5292553191489362, + "grad_norm": 1.1193428039550781, + "learning_rate": 1.5813598637295767e-05, + "loss": 0.2322, + "step": 575 + }, + { + "epoch": 1.5425531914893615, + "grad_norm": 1.048937201499939, + "learning_rate": 1.5593843376394043e-05, + "loss": 0.2025, + "step": 580 + }, + { + "epoch": 1.5558510638297873, + "grad_norm": 1.3625367879867554, + "learning_rate": 1.5373960376071095e-05, + "loss": 0.2208, + "step": 585 + }, + { + "epoch": 1.5691489361702127, + "grad_norm": 1.152076005935669, + "learning_rate": 1.515399693453435e-05, + "loss": 0.192, + "step": 590 + }, + { + "epoch": 1.5824468085106385, + "grad_norm": 1.1551628112792969, + "learning_rate": 1.493400036729465e-05, + "loss": 0.2228, + "step": 595 + }, + { + "epoch": 1.5957446808510638, + "grad_norm": 1.138343095779419, + "learning_rate": 1.4714017996988384e-05, + "loss": 0.2221, + "step": 600 + }, + { + "epoch": 1.6090425531914894, + "grad_norm": 1.3027465343475342, + "learning_rate": 1.4494097143198083e-05, + "loss": 0.2013, + "step": 605 + }, + { + "epoch": 1.622340425531915, + "grad_norm": 1.0445104837417603, + "learning_rate": 1.4274285112273701e-05, + "loss": 0.196, + "step": 610 + }, + { + "epoch": 1.6356382978723403, + "grad_norm": 1.0990639925003052, + "learning_rate": 1.4054629187156702e-05, + "loss": 0.2039, + "step": 615 + }, + { + "epoch": 1.648936170212766, + "grad_norm": 1.053717851638794, + "learning_rate": 1.3835176617209241e-05, + "loss": 0.208, + "step": 620 + }, + { + "epoch": 1.6622340425531914, + "grad_norm": 0.9232786893844604, + "learning_rate": 1.3615974608050472e-05, + "loss": 0.197, + "step": 625 + }, + { + "epoch": 1.675531914893617, + "grad_norm": 1.001939296722412, + "learning_rate": 1.3397070311402377e-05, + "loss": 0.1829, + "step": 630 + }, + { + "epoch": 1.6888297872340425, + "grad_norm": 1.3330159187316895, + "learning_rate": 1.3178510814947112e-05, + "loss": 0.2057, + "step": 635 + }, + { + "epoch": 1.702127659574468, + "grad_norm": 1.1185407638549805, + "learning_rate": 1.296034313219816e-05, + "loss": 0.1765, + "step": 640 + }, + { + "epoch": 1.7154255319148937, + "grad_norm": 1.1284985542297363, + "learning_rate": 1.2742614192387417e-05, + "loss": 0.1757, + "step": 645 + }, + { + "epoch": 1.728723404255319, + "grad_norm": 1.1091201305389404, + "learning_rate": 1.2525370830370447e-05, + "loss": 0.1882, + "step": 650 + }, + { + "epoch": 1.7420212765957448, + "grad_norm": 1.1470004320144653, + "learning_rate": 1.2308659776551985e-05, + "loss": 0.1728, + "step": 655 + }, + { + "epoch": 1.7553191489361701, + "grad_norm": 1.289763331413269, + "learning_rate": 1.209252764683395e-05, + "loss": 0.1843, + "step": 660 + }, + { + "epoch": 1.7686170212765957, + "grad_norm": 1.1346173286437988, + "learning_rate": 1.1877020932588067e-05, + "loss": 0.1689, + "step": 665 + }, + { + "epoch": 1.7819148936170213, + "grad_norm": 1.0531857013702393, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.1472, + "step": 670 + }, + { + "epoch": 1.7952127659574468, + "grad_norm": 1.3603458404541016, + "learning_rate": 1.1448069033374135e-05, + "loss": 0.1841, + "step": 675 + }, + { + "epoch": 1.8085106382978724, + "grad_norm": 1.1423213481903076, + "learning_rate": 1.1234716118640149e-05, + "loss": 0.1745, + "step": 680 + }, + { + "epoch": 1.8218085106382977, + "grad_norm": 1.4216969013214111, + "learning_rate": 1.1022173139998556e-05, + "loss": 0.1692, + "step": 685 + }, + { + "epoch": 1.8351063829787235, + "grad_norm": 1.2006597518920898, + "learning_rate": 1.0810485816772251e-05, + "loss": 0.1724, + "step": 690 + }, + { + "epoch": 1.8484042553191489, + "grad_norm": 1.1873189210891724, + "learning_rate": 1.0599699684227313e-05, + "loss": 0.1745, + "step": 695 + }, + { + "epoch": 1.8617021276595744, + "grad_norm": 1.0509731769561768, + "learning_rate": 1.0389860083778056e-05, + "loss": 0.1737, + "step": 700 + }, + { + "epoch": 1.875, + "grad_norm": 0.9806941747665405, + "learning_rate": 1.0181012153233851e-05, + "loss": 0.1483, + "step": 705 + }, + { + "epoch": 1.8882978723404256, + "grad_norm": 1.083099365234375, + "learning_rate": 9.973200817089655e-06, + "loss": 0.1583, + "step": 710 + }, + { + "epoch": 1.9015957446808511, + "grad_norm": 1.2485673427581787, + "learning_rate": 9.7664707768625e-06, + "loss": 0.1506, + "step": 715 + }, + { + "epoch": 1.9148936170212765, + "grad_norm": 0.9818064570426941, + "learning_rate": 9.560866501475913e-06, + "loss": 0.1463, + "step": 720 + }, + { + "epoch": 1.9281914893617023, + "grad_norm": 1.1130419969558716, + "learning_rate": 9.35643221769436e-06, + "loss": 0.1467, + "step": 725 + }, + { + "epoch": 1.9414893617021276, + "grad_norm": 1.0716183185577393, + "learning_rate": 9.15321190060981e-06, + "loss": 0.14, + "step": 730 + }, + { + "epoch": 1.9547872340425532, + "grad_norm": 1.2828465700149536, + "learning_rate": 8.951249264182403e-06, + "loss": 0.1698, + "step": 735 + }, + { + "epoch": 1.9680851063829787, + "grad_norm": 1.0811541080474854, + "learning_rate": 8.750587751837313e-06, + "loss": 0.161, + "step": 740 + }, + { + "epoch": 1.9813829787234043, + "grad_norm": 1.2441790103912354, + "learning_rate": 8.551270527119784e-06, + "loss": 0.1456, + "step": 745 + }, + { + "epoch": 1.9946808510638299, + "grad_norm": 0.9267172813415527, + "learning_rate": 8.35334046441041e-06, + "loss": 0.1505, + "step": 750 + }, + { + "epoch": 2.007978723404255, + "grad_norm": 1.1950887441635132, + "learning_rate": 8.156840139702554e-06, + "loss": 0.1254, + "step": 755 + }, + { + "epoch": 2.021276595744681, + "grad_norm": 0.9787840247154236, + "learning_rate": 7.961811821444008e-06, + "loss": 0.1123, + "step": 760 + }, + { + "epoch": 2.0345744680851063, + "grad_norm": 0.9884850382804871, + "learning_rate": 7.768297461444766e-06, + "loss": 0.1244, + "step": 765 + }, + { + "epoch": 2.047872340425532, + "grad_norm": 1.0728877782821655, + "learning_rate": 7.576338685852955e-06, + "loss": 0.1218, + "step": 770 + }, + { + "epoch": 2.0611702127659575, + "grad_norm": 1.1110516786575317, + "learning_rate": 7.385976786200765e-06, + "loss": 0.1061, + "step": 775 + }, + { + "epoch": 2.074468085106383, + "grad_norm": 0.9580075740814209, + "learning_rate": 7.197252710522395e-06, + "loss": 0.1248, + "step": 780 + }, + { + "epoch": 2.0877659574468086, + "grad_norm": 0.849315881729126, + "learning_rate": 7.010207054545873e-06, + "loss": 0.1053, + "step": 785 + }, + { + "epoch": 2.101063829787234, + "grad_norm": 0.9483814835548401, + "learning_rate": 6.8248800529606604e-06, + "loss": 0.1213, + "step": 790 + }, + { + "epoch": 2.1143617021276597, + "grad_norm": 0.9562873244285583, + "learning_rate": 6.641311570762918e-06, + "loss": 0.1086, + "step": 795 + }, + { + "epoch": 2.127659574468085, + "grad_norm": 0.978416919708252, + "learning_rate": 6.4595410946803e-06, + "loss": 0.1173, + "step": 800 + }, + { + "epoch": 2.1409574468085104, + "grad_norm": 0.96921306848526, + "learning_rate": 6.2796077246781046e-06, + "loss": 0.0993, + "step": 805 + }, + { + "epoch": 2.154255319148936, + "grad_norm": 1.001747965812683, + "learning_rate": 6.1015501655486365e-06, + "loss": 0.1087, + "step": 810 + }, + { + "epoch": 2.1675531914893615, + "grad_norm": 1.008859395980835, + "learning_rate": 5.925406718585552e-06, + "loss": 0.1152, + "step": 815 + }, + { + "epoch": 2.1808510638297873, + "grad_norm": 0.8488165736198425, + "learning_rate": 5.751215273345036e-06, + "loss": 0.1054, + "step": 820 + }, + { + "epoch": 2.1941489361702127, + "grad_norm": 1.074955940246582, + "learning_rate": 5.5790132994954935e-06, + "loss": 0.1227, + "step": 825 + }, + { + "epoch": 2.2074468085106385, + "grad_norm": 0.8532001376152039, + "learning_rate": 5.408837838757588e-06, + "loss": 0.107, + "step": 830 + }, + { + "epoch": 2.220744680851064, + "grad_norm": 0.9291162490844727, + "learning_rate": 5.240725496936373e-06, + "loss": 0.0983, + "step": 835 + }, + { + "epoch": 2.2340425531914896, + "grad_norm": 0.8940398097038269, + "learning_rate": 5.0747124360471125e-06, + "loss": 0.0878, + "step": 840 + }, + { + "epoch": 2.247340425531915, + "grad_norm": 1.070345401763916, + "learning_rate": 4.910834366536631e-06, + "loss": 0.0958, + "step": 845 + }, + { + "epoch": 2.2606382978723403, + "grad_norm": 0.8505008220672607, + "learning_rate": 4.74912653960177e-06, + "loss": 0.0876, + "step": 850 + }, + { + "epoch": 2.273936170212766, + "grad_norm": 0.8866683840751648, + "learning_rate": 4.589623739606625e-06, + "loss": 0.101, + "step": 855 + }, + { + "epoch": 2.2872340425531914, + "grad_norm": 0.9380369782447815, + "learning_rate": 4.4323602766002165e-06, + "loss": 0.1006, + "step": 860 + }, + { + "epoch": 2.300531914893617, + "grad_norm": 0.7981929183006287, + "learning_rate": 4.277369978936188e-06, + "loss": 0.0971, + "step": 865 + }, + { + "epoch": 2.3138297872340425, + "grad_norm": 1.0620580911636353, + "learning_rate": 4.1246861859961114e-06, + "loss": 0.1044, + "step": 870 + }, + { + "epoch": 2.327127659574468, + "grad_norm": 0.8291345834732056, + "learning_rate": 3.974341741017978e-06, + "loss": 0.0789, + "step": 875 + }, + { + "epoch": 2.3404255319148937, + "grad_norm": 0.6417453289031982, + "learning_rate": 3.826368984031414e-06, + "loss": 0.101, + "step": 880 + }, + { + "epoch": 2.353723404255319, + "grad_norm": 0.8143091797828674, + "learning_rate": 3.6807997449011426e-06, + "loss": 0.1129, + "step": 885 + }, + { + "epoch": 2.367021276595745, + "grad_norm": 0.8944543600082397, + "learning_rate": 3.5376653364801703e-06, + "loss": 0.0897, + "step": 890 + }, + { + "epoch": 2.38031914893617, + "grad_norm": 0.7766957879066467, + "learning_rate": 3.3969965478742038e-06, + "loss": 0.0909, + "step": 895 + }, + { + "epoch": 2.393617021276596, + "grad_norm": 0.9118006229400635, + "learning_rate": 3.258823637818722e-06, + "loss": 0.0888, + "step": 900 + }, + { + "epoch": 2.4069148936170213, + "grad_norm": 0.9052854180335999, + "learning_rate": 3.123176328170131e-06, + "loss": 0.1101, + "step": 905 + }, + { + "epoch": 2.420212765957447, + "grad_norm": 0.8282054662704468, + "learning_rate": 2.990083797512401e-06, + "loss": 0.0977, + "step": 910 + }, + { + "epoch": 2.4335106382978724, + "grad_norm": 0.9523313641548157, + "learning_rate": 2.8595746748805805e-06, + "loss": 0.1031, + "step": 915 + }, + { + "epoch": 2.4468085106382977, + "grad_norm": 0.8263522982597351, + "learning_rate": 2.7316770336025166e-06, + "loss": 0.0886, + "step": 920 + }, + { + "epoch": 2.4601063829787235, + "grad_norm": 0.6050865054130554, + "learning_rate": 2.60641838526008e-06, + "loss": 0.09, + "step": 925 + }, + { + "epoch": 2.473404255319149, + "grad_norm": 1.259838581085205, + "learning_rate": 2.483825673771279e-06, + "loss": 0.0938, + "step": 930 + }, + { + "epoch": 2.4867021276595747, + "grad_norm": 0.8593924641609192, + "learning_rate": 2.363925269594449e-06, + "loss": 0.0891, + "step": 935 + }, + { + "epoch": 2.5, + "grad_norm": 0.8433693051338196, + "learning_rate": 2.2467429640557903e-06, + "loss": 0.1017, + "step": 940 + }, + { + "epoch": 2.5132978723404253, + "grad_norm": 0.7522491812705994, + "learning_rate": 2.1323039638015024e-06, + "loss": 0.0951, + "step": 945 + }, + { + "epoch": 2.526595744680851, + "grad_norm": 0.7738783955574036, + "learning_rate": 2.020632885375684e-06, + "loss": 0.091, + "step": 950 + }, + { + "epoch": 2.5398936170212765, + "grad_norm": 0.9026947617530823, + "learning_rate": 1.9117537499251416e-06, + "loss": 0.0824, + "step": 955 + }, + { + "epoch": 2.5531914893617023, + "grad_norm": 0.7013728618621826, + "learning_rate": 1.8056899780323016e-06, + "loss": 0.0824, + "step": 960 + }, + { + "epoch": 2.5664893617021276, + "grad_norm": 0.8128451108932495, + "learning_rate": 1.7024643846772981e-06, + "loss": 0.0853, + "step": 965 + }, + { + "epoch": 2.579787234042553, + "grad_norm": 0.7896420359611511, + "learning_rate": 1.6020991743303264e-06, + "loss": 0.0879, + "step": 970 + }, + { + "epoch": 2.5930851063829787, + "grad_norm": 0.6438320279121399, + "learning_rate": 1.5046159361753226e-06, + "loss": 0.0817, + "step": 975 + }, + { + "epoch": 2.6063829787234045, + "grad_norm": 0.5906373858451843, + "learning_rate": 1.4100356394659863e-06, + "loss": 0.0789, + "step": 980 + }, + { + "epoch": 2.61968085106383, + "grad_norm": 0.8968678116798401, + "learning_rate": 1.318378629015184e-06, + "loss": 0.0786, + "step": 985 + }, + { + "epoch": 2.632978723404255, + "grad_norm": 0.6626492142677307, + "learning_rate": 1.229664620818633e-06, + "loss": 0.0882, + "step": 990 + }, + { + "epoch": 2.646276595744681, + "grad_norm": 0.6762717962265015, + "learning_rate": 1.1439126978138769e-06, + "loss": 0.0855, + "step": 995 + }, + { + "epoch": 2.6595744680851063, + "grad_norm": 0.6753803491592407, + "learning_rate": 1.0611413057754221e-06, + "loss": 0.08, + "step": 1000 + }, + { + "epoch": 2.672872340425532, + "grad_norm": 0.596386730670929, + "learning_rate": 9.813682493469396e-07, + "loss": 0.0857, + "step": 1005 + }, + { + "epoch": 2.6861702127659575, + "grad_norm": 0.6813955307006836, + "learning_rate": 9.046106882113753e-07, + "loss": 0.0803, + "step": 1010 + }, + { + "epoch": 2.699468085106383, + "grad_norm": 0.8387485146522522, + "learning_rate": 8.308851333997918e-07, + "loss": 0.077, + "step": 1015 + }, + { + "epoch": 2.7127659574468086, + "grad_norm": 0.7364112734794617, + "learning_rate": 7.602074437397455e-07, + "loss": 0.0811, + "step": 1020 + }, + { + "epoch": 2.726063829787234, + "grad_norm": 0.6899734735488892, + "learning_rate": 6.925928224439532e-07, + "loss": 0.0683, + "step": 1025 + }, + { + "epoch": 2.7393617021276597, + "grad_norm": 0.725025475025177, + "learning_rate": 6.280558138399805e-07, + "loss": 0.0831, + "step": 1030 + }, + { + "epoch": 2.752659574468085, + "grad_norm": 0.7464246153831482, + "learning_rate": 5.666103002416762e-07, + "loss": 0.0786, + "step": 1035 + }, + { + "epoch": 2.7659574468085104, + "grad_norm": 0.8465781211853027, + "learning_rate": 5.082694989629916e-07, + "loss": 0.0695, + "step": 1040 + }, + { + "epoch": 2.779255319148936, + "grad_norm": 0.7774459719657898, + "learning_rate": 4.5304595947485927e-07, + "loss": 0.0835, + "step": 1045 + }, + { + "epoch": 2.7925531914893615, + "grad_norm": 0.8020236492156982, + "learning_rate": 4.0095156070571513e-07, + "loss": 0.0707, + "step": 1050 + }, + { + "epoch": 2.8058510638297873, + "grad_norm": 0.6417722702026367, + "learning_rate": 3.5199750848627753e-07, + "loss": 0.0833, + "step": 1055 + }, + { + "epoch": 2.8191489361702127, + "grad_norm": 0.688158392906189, + "learning_rate": 3.0619433313909706e-07, + "loss": 0.0997, + "step": 1060 + }, + { + "epoch": 2.8324468085106385, + "grad_norm": 0.7241319417953491, + "learning_rate": 2.635518872134185e-07, + "loss": 0.0908, + "step": 1065 + }, + { + "epoch": 2.845744680851064, + "grad_norm": 0.6616683006286621, + "learning_rate": 2.2407934336583446e-07, + "loss": 0.0781, + "step": 1070 + }, + { + "epoch": 2.8590425531914896, + "grad_norm": 0.7756491899490356, + "learning_rate": 1.8778519238719204e-07, + "loss": 0.0917, + "step": 1075 + }, + { + "epoch": 2.872340425531915, + "grad_norm": 0.7911010980606079, + "learning_rate": 1.5467724137617046e-07, + "loss": 0.0894, + "step": 1080 + }, + { + "epoch": 2.8856382978723403, + "grad_norm": 0.8057021498680115, + "learning_rate": 1.2476261205992934e-07, + "loss": 0.0732, + "step": 1085 + }, + { + "epoch": 2.898936170212766, + "grad_norm": 0.6991729140281677, + "learning_rate": 9.804773926217092e-08, + "loss": 0.0776, + "step": 1090 + }, + { + "epoch": 2.9122340425531914, + "grad_norm": 0.6029214859008789, + "learning_rate": 7.453836951897885e-08, + "loss": 0.0688, + "step": 1095 + }, + { + "epoch": 2.925531914893617, + "grad_norm": 0.5898141264915466, + "learning_rate": 5.4239559842695354e-08, + "loss": 0.0752, + "step": 1100 + }, + { + "epoch": 2.9388297872340425, + "grad_norm": 0.6291354298591614, + "learning_rate": 3.715567663412966e-08, + "loss": 0.0841, + "step": 1105 + }, + { + "epoch": 2.952127659574468, + "grad_norm": 0.8570540547370911, + "learning_rate": 2.3290394743317732e-08, + "loss": 0.0862, + "step": 1110 + }, + { + "epoch": 2.9654255319148937, + "grad_norm": 0.6967211365699768, + "learning_rate": 1.2646696679042835e-08, + "loss": 0.0691, + "step": 1115 + }, + { + "epoch": 2.978723404255319, + "grad_norm": 0.6774645447731018, + "learning_rate": 5.2268719672671215e-09, + "loss": 0.0771, + "step": 1120 + }, + { + "epoch": 2.992021276595745, + "grad_norm": 0.662575364112854, + "learning_rate": 1.0325166586572233e-09, + "loss": 0.0859, + "step": 1125 + }, + { + "epoch": 3.0, + "step": 1128, + "total_flos": 1.4339270698462085e+18, + "train_loss": 0.3988168353994265, + "train_runtime": 612.9862, + "train_samples_per_second": 58.886, + "train_steps_per_second": 1.84 + } + ], + "logging_steps": 5, + "max_steps": 1128, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4339270698462085e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb3d100e69d0ae6df0442e188742a7287355a3e4 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/19_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4a81a5784f26a173b5fce6ecb7ebd87c2da8d873dfb956e68fb463b2113606 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..70bd6c0724fa419758c1c792678cb0bd6db3ae75 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 1_128_e3_3e-5 + results: [] +--- + + + +# 1_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..90db5ed614f3fe4c0bd4c17c8f8c55e06a9046b2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "down_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6023580ec97799899a8822a0487822fd61594bc0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7be9cedd59c107648f20f2dd574a2c67d78bb9dab7803e279730d5fe4ca9aec +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6063f3bf706e24f777dda8c64dd536858af2baf --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2938989521139139e+18, + "train_loss": 0.44608225578962896, + "train_runtime": 570.0249, + "train_samples": 11101, + "train_samples_per_second": 58.424, + "train_steps_per_second": 1.826 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6063f3bf706e24f777dda8c64dd536858af2baf --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2938989521139139e+18, + "train_loss": 0.44608225578962896, + "train_runtime": 570.0249, + "train_samples": 11101, + "train_samples_per_second": 58.424, + "train_steps_per_second": 1.826 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2892c081bb2f4f89fc306e9649b55b9d9cac7a5c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1499 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1041, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01440922190201729, + "grad_norm": 0.7492797374725342, + "learning_rate": 2.2641509433962262e-06, + "loss": 1.6331, + "step": 5 + }, + { + "epoch": 0.02881844380403458, + "grad_norm": 0.6836415529251099, + "learning_rate": 5.094339622641509e-06, + "loss": 1.5938, + "step": 10 + }, + { + "epoch": 0.043227665706051875, + "grad_norm": 0.5749224424362183, + "learning_rate": 7.924528301886793e-06, + "loss": 1.6621, + "step": 15 + }, + { + "epoch": 0.05763688760806916, + "grad_norm": 0.5081445574760437, + "learning_rate": 1.0754716981132076e-05, + "loss": 1.5405, + "step": 20 + }, + { + "epoch": 0.07204610951008646, + "grad_norm": 0.48484840989112854, + "learning_rate": 1.358490566037736e-05, + "loss": 1.5868, + "step": 25 + }, + { + "epoch": 0.08645533141210375, + "grad_norm": 0.512398898601532, + "learning_rate": 1.6415094339622643e-05, + "loss": 1.5633, + "step": 30 + }, + { + "epoch": 0.10086455331412104, + "grad_norm": 0.4588213562965393, + "learning_rate": 1.9245283018867924e-05, + "loss": 1.5029, + "step": 35 + }, + { + "epoch": 0.11527377521613832, + "grad_norm": 0.45142418146133423, + "learning_rate": 2.2075471698113208e-05, + "loss": 1.4939, + "step": 40 + }, + { + "epoch": 0.12968299711815562, + "grad_norm": 0.4935033321380615, + "learning_rate": 2.4905660377358492e-05, + "loss": 1.4847, + "step": 45 + }, + { + "epoch": 0.1440922190201729, + "grad_norm": 0.5325248837471008, + "learning_rate": 2.7735849056603773e-05, + "loss": 1.4731, + "step": 50 + }, + { + "epoch": 0.1585014409221902, + "grad_norm": 0.79184490442276, + "learning_rate": 2.9999924169005146e-05, + "loss": 1.4233, + "step": 55 + }, + { + "epoch": 0.1729106628242075, + "grad_norm": 0.5387024283409119, + "learning_rate": 2.9997270164689188e-05, + "loss": 1.3827, + "step": 60 + }, + { + "epoch": 0.1873198847262248, + "grad_norm": 0.612156867980957, + "learning_rate": 2.999082537730771e-05, + "loss": 1.339, + "step": 65 + }, + { + "epoch": 0.2017291066282421, + "grad_norm": 0.6281117796897888, + "learning_rate": 2.998059143587657e-05, + "loss": 1.4007, + "step": 70 + }, + { + "epoch": 0.21613832853025935, + "grad_norm": 0.6070348024368286, + "learning_rate": 2.9966570927176653e-05, + "loss": 1.3111, + "step": 75 + }, + { + "epoch": 0.23054755043227665, + "grad_norm": 0.5962775945663452, + "learning_rate": 2.994876739510005e-05, + "loss": 1.2987, + "step": 80 + }, + { + "epoch": 0.24495677233429394, + "grad_norm": 0.6347147226333618, + "learning_rate": 2.9927185339754245e-05, + "loss": 1.2279, + "step": 85 + }, + { + "epoch": 0.25936599423631124, + "grad_norm": 0.6639678478240967, + "learning_rate": 2.9901830216324694e-05, + "loss": 1.2922, + "step": 90 + }, + { + "epoch": 0.2737752161383285, + "grad_norm": 0.6853729486465454, + "learning_rate": 2.9872708433695907e-05, + "loss": 1.2491, + "step": 95 + }, + { + "epoch": 0.2881844380403458, + "grad_norm": 0.7229520678520203, + "learning_rate": 2.9839827352831522e-05, + "loss": 1.1519, + "step": 100 + }, + { + "epoch": 0.3025936599423631, + "grad_norm": 0.7066729664802551, + "learning_rate": 2.980319528491373e-05, + "loss": 1.1467, + "step": 105 + }, + { + "epoch": 0.3170028818443804, + "grad_norm": 0.8395964503288269, + "learning_rate": 2.976282148924246e-05, + "loss": 1.1499, + "step": 110 + }, + { + "epoch": 0.3314121037463977, + "grad_norm": 0.7626848220825195, + "learning_rate": 2.9718716170894987e-05, + "loss": 1.1234, + "step": 115 + }, + { + "epoch": 0.345821325648415, + "grad_norm": 0.7766785025596619, + "learning_rate": 2.967089047814643e-05, + "loss": 1.0736, + "step": 120 + }, + { + "epoch": 0.36023054755043227, + "grad_norm": 0.7757993340492249, + "learning_rate": 2.961935649965188e-05, + "loss": 1.1457, + "step": 125 + }, + { + "epoch": 0.3746397694524496, + "grad_norm": 0.8008221387863159, + "learning_rate": 2.956412726139078e-05, + "loss": 1.0716, + "step": 130 + }, + { + "epoch": 0.38904899135446686, + "grad_norm": 1.0366390943527222, + "learning_rate": 2.9505216723374442e-05, + "loss": 1.0534, + "step": 135 + }, + { + "epoch": 0.4034582132564842, + "grad_norm": 0.8672937154769897, + "learning_rate": 2.9442639776117436e-05, + "loss": 1.0135, + "step": 140 + }, + { + "epoch": 0.41786743515850144, + "grad_norm": 0.8539049029350281, + "learning_rate": 2.9376412236873792e-05, + "loss": 0.9967, + "step": 145 + }, + { + "epoch": 0.4322766570605187, + "grad_norm": 0.9187260270118713, + "learning_rate": 2.9306550845638953e-05, + "loss": 0.9942, + "step": 150 + }, + { + "epoch": 0.44668587896253603, + "grad_norm": 0.969350278377533, + "learning_rate": 2.9233073260918497e-05, + "loss": 0.9617, + "step": 155 + }, + { + "epoch": 0.4610951008645533, + "grad_norm": 0.9221796989440918, + "learning_rate": 2.9155998055264676e-05, + "loss": 0.9535, + "step": 160 + }, + { + "epoch": 0.4755043227665706, + "grad_norm": 0.9525759220123291, + "learning_rate": 2.907534471058195e-05, + "loss": 0.8988, + "step": 165 + }, + { + "epoch": 0.4899135446685879, + "grad_norm": 0.9378052949905396, + "learning_rate": 2.8991133613202615e-05, + "loss": 0.9131, + "step": 170 + }, + { + "epoch": 0.5043227665706052, + "grad_norm": 1.1599642038345337, + "learning_rate": 2.890338604873387e-05, + "loss": 0.9014, + "step": 175 + }, + { + "epoch": 0.5187319884726225, + "grad_norm": 1.0630452632904053, + "learning_rate": 2.8812124196677585e-05, + "loss": 0.871, + "step": 180 + }, + { + "epoch": 0.5331412103746398, + "grad_norm": 1.0474090576171875, + "learning_rate": 2.871737112482405e-05, + "loss": 0.8208, + "step": 185 + }, + { + "epoch": 0.547550432276657, + "grad_norm": 1.0715168714523315, + "learning_rate": 2.8619150783421303e-05, + "loss": 0.8851, + "step": 190 + }, + { + "epoch": 0.5619596541786743, + "grad_norm": 1.0557371377944946, + "learning_rate": 2.851748799912131e-05, + "loss": 0.8297, + "step": 195 + }, + { + "epoch": 0.5763688760806917, + "grad_norm": 1.0280017852783203, + "learning_rate": 2.8412408468704673e-05, + "loss": 0.8576, + "step": 200 + }, + { + "epoch": 0.590778097982709, + "grad_norm": 0.9882028698921204, + "learning_rate": 2.83039387525854e-05, + "loss": 0.8233, + "step": 205 + }, + { + "epoch": 0.6051873198847262, + "grad_norm": 1.0780398845672607, + "learning_rate": 2.8192106268097336e-05, + "loss": 0.8269, + "step": 210 + }, + { + "epoch": 0.6195965417867435, + "grad_norm": 1.1255983114242554, + "learning_rate": 2.8076939282564054e-05, + "loss": 0.7651, + "step": 215 + }, + { + "epoch": 0.6340057636887608, + "grad_norm": 1.2062652111053467, + "learning_rate": 2.795846690615385e-05, + "loss": 0.6915, + "step": 220 + }, + { + "epoch": 0.6484149855907781, + "grad_norm": 1.2327758073806763, + "learning_rate": 2.7836719084521714e-05, + "loss": 0.7378, + "step": 225 + }, + { + "epoch": 0.6628242074927954, + "grad_norm": 1.170824646949768, + "learning_rate": 2.7711726591240133e-05, + "loss": 0.6678, + "step": 230 + }, + { + "epoch": 0.6772334293948127, + "grad_norm": 1.1761808395385742, + "learning_rate": 2.7583521020020615e-05, + "loss": 0.6466, + "step": 235 + }, + { + "epoch": 0.69164265129683, + "grad_norm": 1.1840074062347412, + "learning_rate": 2.7452134776727875e-05, + "loss": 0.6627, + "step": 240 + }, + { + "epoch": 0.7060518731988472, + "grad_norm": 1.1256996393203735, + "learning_rate": 2.7317601071188823e-05, + "loss": 0.7256, + "step": 245 + }, + { + "epoch": 0.7204610951008645, + "grad_norm": 1.2698858976364136, + "learning_rate": 2.7179953908798246e-05, + "loss": 0.6833, + "step": 250 + }, + { + "epoch": 0.7348703170028819, + "grad_norm": 1.3307738304138184, + "learning_rate": 2.7039228081923448e-05, + "loss": 0.6788, + "step": 255 + }, + { + "epoch": 0.7492795389048992, + "grad_norm": 1.4031965732574463, + "learning_rate": 2.6895459161109978e-05, + "loss": 0.6786, + "step": 260 + }, + { + "epoch": 0.7636887608069164, + "grad_norm": 1.153998613357544, + "learning_rate": 2.6748683486090616e-05, + "loss": 0.6266, + "step": 265 + }, + { + "epoch": 0.7780979827089337, + "grad_norm": 1.157024621963501, + "learning_rate": 2.6598938156600005e-05, + "loss": 0.5759, + "step": 270 + }, + { + "epoch": 0.792507204610951, + "grad_norm": 1.2128347158432007, + "learning_rate": 2.6446261022997098e-05, + "loss": 0.6454, + "step": 275 + }, + { + "epoch": 0.8069164265129684, + "grad_norm": 1.2370502948760986, + "learning_rate": 2.629069067669795e-05, + "loss": 0.5585, + "step": 280 + }, + { + "epoch": 0.8213256484149856, + "grad_norm": 1.215880274772644, + "learning_rate": 2.613226644042114e-05, + "loss": 0.5229, + "step": 285 + }, + { + "epoch": 0.8357348703170029, + "grad_norm": 1.2677791118621826, + "learning_rate": 2.5971028358248396e-05, + "loss": 0.5845, + "step": 290 + }, + { + "epoch": 0.8501440922190202, + "grad_norm": 1.3856490850448608, + "learning_rate": 2.5807017185502833e-05, + "loss": 0.5655, + "step": 295 + }, + { + "epoch": 0.8645533141210374, + "grad_norm": 1.126950979232788, + "learning_rate": 2.5640274378447444e-05, + "loss": 0.5979, + "step": 300 + }, + { + "epoch": 0.8789625360230547, + "grad_norm": 1.3373862504959106, + "learning_rate": 2.5470842083806424e-05, + "loss": 0.6137, + "step": 305 + }, + { + "epoch": 0.8933717579250721, + "grad_norm": 1.2394235134124756, + "learning_rate": 2.5298763128111956e-05, + "loss": 0.5166, + "step": 310 + }, + { + "epoch": 0.9077809798270894, + "grad_norm": 1.1885842084884644, + "learning_rate": 2.5124081006879148e-05, + "loss": 0.499, + "step": 315 + }, + { + "epoch": 0.9221902017291066, + "grad_norm": 1.2524490356445312, + "learning_rate": 2.494683987361193e-05, + "loss": 0.5518, + "step": 320 + }, + { + "epoch": 0.9365994236311239, + "grad_norm": 1.326690673828125, + "learning_rate": 2.4767084528642564e-05, + "loss": 0.5291, + "step": 325 + }, + { + "epoch": 0.9510086455331412, + "grad_norm": 1.1860755681991577, + "learning_rate": 2.458486040780772e-05, + "loss": 0.4727, + "step": 330 + }, + { + "epoch": 0.9654178674351584, + "grad_norm": 1.2763354778289795, + "learning_rate": 2.440021357096388e-05, + "loss": 0.5305, + "step": 335 + }, + { + "epoch": 0.9798270893371758, + "grad_norm": 1.5397248268127441, + "learning_rate": 2.4213190690345018e-05, + "loss": 0.5293, + "step": 340 + }, + { + "epoch": 0.9942363112391931, + "grad_norm": 1.3064489364624023, + "learning_rate": 2.4023839038765525e-05, + "loss": 0.4432, + "step": 345 + }, + { + "epoch": 1.0086455331412103, + "grad_norm": 1.252968430519104, + "learning_rate": 2.383220647767127e-05, + "loss": 0.4197, + "step": 350 + }, + { + "epoch": 1.0230547550432276, + "grad_norm": 1.2820806503295898, + "learning_rate": 2.363834144504192e-05, + "loss": 0.4349, + "step": 355 + }, + { + "epoch": 1.037463976945245, + "grad_norm": 1.2537747621536255, + "learning_rate": 2.3442292943147543e-05, + "loss": 0.4223, + "step": 360 + }, + { + "epoch": 1.0518731988472623, + "grad_norm": 1.270784616470337, + "learning_rate": 2.324411052616251e-05, + "loss": 0.3916, + "step": 365 + }, + { + "epoch": 1.0662824207492796, + "grad_norm": 1.1691981554031372, + "learning_rate": 2.304384428763998e-05, + "loss": 0.4148, + "step": 370 + }, + { + "epoch": 1.080691642651297, + "grad_norm": 1.355738639831543, + "learning_rate": 2.2841544847849994e-05, + "loss": 0.3826, + "step": 375 + }, + { + "epoch": 1.0951008645533142, + "grad_norm": 1.28179931640625, + "learning_rate": 2.2637263340984446e-05, + "loss": 0.425, + "step": 380 + }, + { + "epoch": 1.1095100864553313, + "grad_norm": 1.2490462064743042, + "learning_rate": 2.2431051402232164e-05, + "loss": 0.3724, + "step": 385 + }, + { + "epoch": 1.1239193083573487, + "grad_norm": 1.2858326435089111, + "learning_rate": 2.2222961154727346e-05, + "loss": 0.3282, + "step": 390 + }, + { + "epoch": 1.138328530259366, + "grad_norm": 1.2335901260375977, + "learning_rate": 2.2013045196374645e-05, + "loss": 0.3976, + "step": 395 + }, + { + "epoch": 1.1527377521613833, + "grad_norm": 1.5027825832366943, + "learning_rate": 2.1801356586554298e-05, + "loss": 0.3565, + "step": 400 + }, + { + "epoch": 1.1671469740634006, + "grad_norm": 1.2726131677627563, + "learning_rate": 2.1587948832710557e-05, + "loss": 0.4172, + "step": 405 + }, + { + "epoch": 1.181556195965418, + "grad_norm": 1.2781306505203247, + "learning_rate": 2.1372875876826892e-05, + "loss": 0.3579, + "step": 410 + }, + { + "epoch": 1.195965417867435, + "grad_norm": 1.1754200458526611, + "learning_rate": 2.1156192081791355e-05, + "loss": 0.3685, + "step": 415 + }, + { + "epoch": 1.2103746397694524, + "grad_norm": 1.5389963388442993, + "learning_rate": 2.093795221765554e-05, + "loss": 0.3569, + "step": 420 + }, + { + "epoch": 1.2247838616714697, + "grad_norm": 1.3061515092849731, + "learning_rate": 2.071821144779066e-05, + "loss": 0.3491, + "step": 425 + }, + { + "epoch": 1.239193083573487, + "grad_norm": 1.318978190422058, + "learning_rate": 2.049702531494417e-05, + "loss": 0.3518, + "step": 430 + }, + { + "epoch": 1.2536023054755043, + "grad_norm": 1.2291319370269775, + "learning_rate": 2.0274449727200497e-05, + "loss": 0.306, + "step": 435 + }, + { + "epoch": 1.2680115273775217, + "grad_norm": 1.2856709957122803, + "learning_rate": 2.0050540943849477e-05, + "loss": 0.3592, + "step": 440 + }, + { + "epoch": 1.282420749279539, + "grad_norm": 1.3439631462097168, + "learning_rate": 1.9825355561165953e-05, + "loss": 0.3629, + "step": 445 + }, + { + "epoch": 1.2968299711815563, + "grad_norm": 1.3343507051467896, + "learning_rate": 1.959895049810423e-05, + "loss": 0.3207, + "step": 450 + }, + { + "epoch": 1.3112391930835736, + "grad_norm": 1.2524086236953735, + "learning_rate": 1.937138298191098e-05, + "loss": 0.3581, + "step": 455 + }, + { + "epoch": 1.3256484149855907, + "grad_norm": 1.3714386224746704, + "learning_rate": 1.914271053366018e-05, + "loss": 0.3195, + "step": 460 + }, + { + "epoch": 1.340057636887608, + "grad_norm": 1.1820271015167236, + "learning_rate": 1.8912990953713812e-05, + "loss": 0.2585, + "step": 465 + }, + { + "epoch": 1.3544668587896254, + "grad_norm": 1.315045714378357, + "learning_rate": 1.8682282307111988e-05, + "loss": 0.3124, + "step": 470 + }, + { + "epoch": 1.3688760806916427, + "grad_norm": 1.2485765218734741, + "learning_rate": 1.8450642908896104e-05, + "loss": 0.3033, + "step": 475 + }, + { + "epoch": 1.38328530259366, + "grad_norm": 1.3093189001083374, + "learning_rate": 1.8218131309368876e-05, + "loss": 0.2886, + "step": 480 + }, + { + "epoch": 1.397694524495677, + "grad_norm": 1.1372108459472656, + "learning_rate": 1.798480627929488e-05, + "loss": 0.275, + "step": 485 + }, + { + "epoch": 1.4121037463976944, + "grad_norm": 1.4163148403167725, + "learning_rate": 1.7750726795045345e-05, + "loss": 0.3022, + "step": 490 + }, + { + "epoch": 1.4265129682997117, + "grad_norm": 1.3949346542358398, + "learning_rate": 1.7515952023691022e-05, + "loss": 0.2902, + "step": 495 + }, + { + "epoch": 1.440922190201729, + "grad_norm": 1.2758122682571411, + "learning_rate": 1.728054130804681e-05, + "loss": 0.2917, + "step": 500 + }, + { + "epoch": 1.4553314121037464, + "grad_norm": 1.172297716140747, + "learning_rate": 1.7044554151672003e-05, + "loss": 0.2591, + "step": 505 + }, + { + "epoch": 1.4697406340057637, + "grad_norm": 1.4821935892105103, + "learning_rate": 1.6808050203829845e-05, + "loss": 0.2606, + "step": 510 + }, + { + "epoch": 1.484149855907781, + "grad_norm": 1.2535938024520874, + "learning_rate": 1.657108924441031e-05, + "loss": 0.2647, + "step": 515 + }, + { + "epoch": 1.4985590778097984, + "grad_norm": 1.321158766746521, + "learning_rate": 1.6333731168819854e-05, + "loss": 0.3082, + "step": 520 + }, + { + "epoch": 1.5129682997118157, + "grad_norm": 1.3765157461166382, + "learning_rate": 1.6096035972841937e-05, + "loss": 0.2686, + "step": 525 + }, + { + "epoch": 1.527377521613833, + "grad_norm": 1.2290353775024414, + "learning_rate": 1.5858063737472222e-05, + "loss": 0.3026, + "step": 530 + }, + { + "epoch": 1.54178674351585, + "grad_norm": 1.3295527696609497, + "learning_rate": 1.5619874613732198e-05, + "loss": 0.2544, + "step": 535 + }, + { + "epoch": 1.5561959654178674, + "grad_norm": 1.2324564456939697, + "learning_rate": 1.5381528807465113e-05, + "loss": 0.283, + "step": 540 + }, + { + "epoch": 1.5706051873198847, + "grad_norm": 1.2470685243606567, + "learning_rate": 1.5143086564118042e-05, + "loss": 0.2448, + "step": 545 + }, + { + "epoch": 1.585014409221902, + "grad_norm": 1.29930579662323, + "learning_rate": 1.4904608153513986e-05, + "loss": 0.229, + "step": 550 + }, + { + "epoch": 1.5994236311239192, + "grad_norm": 1.4599519968032837, + "learning_rate": 1.466615385461774e-05, + "loss": 0.2411, + "step": 555 + }, + { + "epoch": 1.6138328530259365, + "grad_norm": 1.342039704322815, + "learning_rate": 1.4427783940299526e-05, + "loss": 0.2284, + "step": 560 + }, + { + "epoch": 1.6282420749279538, + "grad_norm": 1.2873141765594482, + "learning_rate": 1.4189558662100094e-05, + "loss": 0.2541, + "step": 565 + }, + { + "epoch": 1.6426512968299711, + "grad_norm": 1.28057062625885, + "learning_rate": 1.3951538235001262e-05, + "loss": 0.2276, + "step": 570 + }, + { + "epoch": 1.6570605187319885, + "grad_norm": 1.1288738250732422, + "learning_rate": 1.3713782822205703e-05, + "loss": 0.261, + "step": 575 + }, + { + "epoch": 1.6714697406340058, + "grad_norm": 1.3655900955200195, + "learning_rate": 1.3476352519929766e-05, + "loss": 0.2003, + "step": 580 + }, + { + "epoch": 1.685878962536023, + "grad_norm": 1.2466537952423096, + "learning_rate": 1.3239307342213282e-05, + "loss": 0.2069, + "step": 585 + }, + { + "epoch": 1.7002881844380404, + "grad_norm": 1.605360984802246, + "learning_rate": 1.3002707205750142e-05, + "loss": 0.2546, + "step": 590 + }, + { + "epoch": 1.7146974063400577, + "grad_norm": 1.140758991241455, + "learning_rate": 1.2766611914743415e-05, + "loss": 0.2169, + "step": 595 + }, + { + "epoch": 1.729106628242075, + "grad_norm": 1.3513517379760742, + "learning_rate": 1.2531081145788989e-05, + "loss": 0.2036, + "step": 600 + }, + { + "epoch": 1.7435158501440924, + "grad_norm": 1.3215878009796143, + "learning_rate": 1.2296174432791415e-05, + "loss": 0.2084, + "step": 605 + }, + { + "epoch": 1.7579250720461095, + "grad_norm": 1.2709513902664185, + "learning_rate": 1.20619511519158e-05, + "loss": 0.1831, + "step": 610 + }, + { + "epoch": 1.7723342939481268, + "grad_norm": 1.2882943153381348, + "learning_rate": 1.1828470506579631e-05, + "loss": 0.1886, + "step": 615 + }, + { + "epoch": 1.7867435158501441, + "grad_norm": 1.2569106817245483, + "learning_rate": 1.1595791512488213e-05, + "loss": 0.1899, + "step": 620 + }, + { + "epoch": 1.8011527377521612, + "grad_norm": 1.2002679109573364, + "learning_rate": 1.1363972982717588e-05, + "loss": 0.2021, + "step": 625 + }, + { + "epoch": 1.8155619596541785, + "grad_norm": 1.343056559562683, + "learning_rate": 1.1133073512848635e-05, + "loss": 0.1864, + "step": 630 + }, + { + "epoch": 1.8299711815561959, + "grad_norm": 1.4624463319778442, + "learning_rate": 1.090315146615617e-05, + "loss": 0.2094, + "step": 635 + }, + { + "epoch": 1.8443804034582132, + "grad_norm": 1.33236563205719, + "learning_rate": 1.0674264958856779e-05, + "loss": 0.1896, + "step": 640 + }, + { + "epoch": 1.8587896253602305, + "grad_norm": 1.3589786291122437, + "learning_rate": 1.0446471845419063e-05, + "loss": 0.2248, + "step": 645 + }, + { + "epoch": 1.8731988472622478, + "grad_norm": 1.2545641660690308, + "learning_rate": 1.0219829703940047e-05, + "loss": 0.1982, + "step": 650 + }, + { + "epoch": 1.8876080691642652, + "grad_norm": 1.145972490310669, + "learning_rate": 9.994395821591501e-06, + "loss": 0.1837, + "step": 655 + }, + { + "epoch": 1.9020172910662825, + "grad_norm": 1.1126877069473267, + "learning_rate": 9.770227180139727e-06, + "loss": 0.1374, + "step": 660 + }, + { + "epoch": 1.9164265129682998, + "grad_norm": 1.266788125038147, + "learning_rate": 9.54738044154255e-06, + "loss": 0.1724, + "step": 665 + }, + { + "epoch": 1.9308357348703171, + "grad_norm": 1.2325732707977295, + "learning_rate": 9.325911933627228e-06, + "loss": 0.1717, + "step": 670 + }, + { + "epoch": 1.9452449567723344, + "grad_norm": 1.258494257926941, + "learning_rate": 9.10587763585269e-06, + "loss": 0.1735, + "step": 675 + }, + { + "epoch": 1.9596541786743515, + "grad_norm": 1.1921862363815308, + "learning_rate": 8.887333165159921e-06, + "loss": 0.1654, + "step": 680 + }, + { + "epoch": 1.9740634005763689, + "grad_norm": 1.215484380722046, + "learning_rate": 8.67033376191398e-06, + "loss": 0.1685, + "step": 685 + }, + { + "epoch": 1.9884726224783862, + "grad_norm": 1.1556713581085205, + "learning_rate": 8.454934275941129e-06, + "loss": 0.1761, + "step": 690 + }, + { + "epoch": 2.0028818443804033, + "grad_norm": 0.8784791231155396, + "learning_rate": 8.241189152664756e-06, + "loss": 0.1394, + "step": 695 + }, + { + "epoch": 2.0172910662824206, + "grad_norm": 1.1531994342803955, + "learning_rate": 8.029152419343472e-06, + "loss": 0.1401, + "step": 700 + }, + { + "epoch": 2.031700288184438, + "grad_norm": 1.0254112482070923, + "learning_rate": 7.81887767141492e-06, + "loss": 0.1332, + "step": 705 + }, + { + "epoch": 2.0461095100864553, + "grad_norm": 0.8499062061309814, + "learning_rate": 7.6104180589487354e-06, + "loss": 0.1192, + "step": 710 + }, + { + "epoch": 2.0605187319884726, + "grad_norm": 1.0567491054534912, + "learning_rate": 7.403826273212066e-06, + "loss": 0.136, + "step": 715 + }, + { + "epoch": 2.07492795389049, + "grad_norm": 1.1000789403915405, + "learning_rate": 7.199154533351086e-06, + "loss": 0.1123, + "step": 720 + }, + { + "epoch": 2.089337175792507, + "grad_norm": 0.9786854386329651, + "learning_rate": 6.996454573191799e-06, + "loss": 0.127, + "step": 725 + }, + { + "epoch": 2.1037463976945245, + "grad_norm": 1.1830977201461792, + "learning_rate": 6.795777628163599e-06, + "loss": 0.106, + "step": 730 + }, + { + "epoch": 2.118155619596542, + "grad_norm": 1.088281512260437, + "learning_rate": 6.59717442234869e-06, + "loss": 0.1416, + "step": 735 + }, + { + "epoch": 2.132564841498559, + "grad_norm": 1.1635668277740479, + "learning_rate": 6.400695155660866e-06, + "loss": 0.129, + "step": 740 + }, + { + "epoch": 2.1469740634005765, + "grad_norm": 1.4014676809310913, + "learning_rate": 6.2063894911567185e-06, + "loss": 0.1148, + "step": 745 + }, + { + "epoch": 2.161383285302594, + "grad_norm": 1.1724345684051514, + "learning_rate": 6.0143065424825585e-06, + "loss": 0.1214, + "step": 750 + }, + { + "epoch": 2.175792507204611, + "grad_norm": 0.8763135671615601, + "learning_rate": 5.824494861460226e-06, + "loss": 0.1306, + "step": 755 + }, + { + "epoch": 2.1902017291066285, + "grad_norm": 1.2811965942382812, + "learning_rate": 5.6370024258148595e-06, + "loss": 0.1223, + "step": 760 + }, + { + "epoch": 2.2046109510086453, + "grad_norm": 0.9618120789527893, + "learning_rate": 5.451876627047873e-06, + "loss": 0.1111, + "step": 765 + }, + { + "epoch": 2.2190201729106627, + "grad_norm": 1.072068214416504, + "learning_rate": 5.269164258457997e-06, + "loss": 0.1269, + "step": 770 + }, + { + "epoch": 2.23342939481268, + "grad_norm": 0.8783612251281738, + "learning_rate": 5.088911503313577e-06, + "loss": 0.1144, + "step": 775 + }, + { + "epoch": 2.2478386167146973, + "grad_norm": 0.9910514950752258, + "learning_rate": 4.91116392317912e-06, + "loss": 0.1249, + "step": 780 + }, + { + "epoch": 2.2622478386167146, + "grad_norm": 1.0745559930801392, + "learning_rate": 4.735966446398854e-06, + "loss": 0.1134, + "step": 785 + }, + { + "epoch": 2.276657060518732, + "grad_norm": 1.4366180896759033, + "learning_rate": 4.563363356740486e-06, + "loss": 0.1128, + "step": 790 + }, + { + "epoch": 2.2910662824207493, + "grad_norm": 1.259400486946106, + "learning_rate": 4.393398282201788e-06, + "loss": 0.1212, + "step": 795 + }, + { + "epoch": 2.3054755043227666, + "grad_norm": 1.0771541595458984, + "learning_rate": 4.22611418398298e-06, + "loss": 0.1314, + "step": 800 + }, + { + "epoch": 2.319884726224784, + "grad_norm": 1.0824649333953857, + "learning_rate": 4.0615533456276445e-06, + "loss": 0.1153, + "step": 805 + }, + { + "epoch": 2.3342939481268012, + "grad_norm": 0.9429168701171875, + "learning_rate": 3.8997573623349385e-06, + "loss": 0.1201, + "step": 810 + }, + { + "epoch": 2.3487031700288186, + "grad_norm": 0.9275110363960266, + "learning_rate": 3.7407671304457865e-06, + "loss": 0.127, + "step": 815 + }, + { + "epoch": 2.363112391930836, + "grad_norm": 0.9857067465782166, + "learning_rate": 3.584622837105702e-06, + "loss": 0.1078, + "step": 820 + }, + { + "epoch": 2.377521613832853, + "grad_norm": 0.9318557977676392, + "learning_rate": 3.4313639501069423e-06, + "loss": 0.0807, + "step": 825 + }, + { + "epoch": 2.39193083573487, + "grad_norm": 0.97931307554245, + "learning_rate": 3.281029207912364e-06, + "loss": 0.0989, + "step": 830 + }, + { + "epoch": 2.4063400576368874, + "grad_norm": 1.1202027797698975, + "learning_rate": 3.1336566098637553e-06, + "loss": 0.1092, + "step": 835 + }, + { + "epoch": 2.4207492795389047, + "grad_norm": 0.9370639324188232, + "learning_rate": 2.989283406576932e-06, + "loss": 0.1081, + "step": 840 + }, + { + "epoch": 2.435158501440922, + "grad_norm": 0.8955278992652893, + "learning_rate": 2.847946090526056e-06, + "loss": 0.1048, + "step": 845 + }, + { + "epoch": 2.4495677233429394, + "grad_norm": 0.8898526430130005, + "learning_rate": 2.7096803868196546e-06, + "loss": 0.1132, + "step": 850 + }, + { + "epoch": 2.4639769452449567, + "grad_norm": 0.9966161251068115, + "learning_rate": 2.574521244170554e-06, + "loss": 0.1017, + "step": 855 + }, + { + "epoch": 2.478386167146974, + "grad_norm": 0.9917815327644348, + "learning_rate": 2.442502826062072e-06, + "loss": 0.096, + "step": 860 + }, + { + "epoch": 2.4927953890489913, + "grad_norm": 0.7236695289611816, + "learning_rate": 2.3136585021126965e-06, + "loss": 0.1004, + "step": 865 + }, + { + "epoch": 2.5072046109510087, + "grad_norm": 0.8032218813896179, + "learning_rate": 2.1880208396413996e-06, + "loss": 0.0928, + "step": 870 + }, + { + "epoch": 2.521613832853026, + "grad_norm": 0.7975876331329346, + "learning_rate": 2.0656215954358025e-06, + "loss": 0.0938, + "step": 875 + }, + { + "epoch": 2.5360230547550433, + "grad_norm": 0.9391493797302246, + "learning_rate": 1.946491707725122e-06, + "loss": 0.1055, + "step": 880 + }, + { + "epoch": 2.5504322766570606, + "grad_norm": 0.8127132058143616, + "learning_rate": 1.8306612883601193e-06, + "loss": 0.1109, + "step": 885 + }, + { + "epoch": 2.564841498559078, + "grad_norm": 0.8584034442901611, + "learning_rate": 1.718159615201853e-06, + "loss": 0.1069, + "step": 890 + }, + { + "epoch": 2.5792507204610953, + "grad_norm": 1.027357816696167, + "learning_rate": 1.6090151247212814e-06, + "loss": 0.1147, + "step": 895 + }, + { + "epoch": 2.5936599423631126, + "grad_norm": 0.9306102395057678, + "learning_rate": 1.503255404811511e-06, + "loss": 0.1021, + "step": 900 + }, + { + "epoch": 2.60806916426513, + "grad_norm": 0.8479741811752319, + "learning_rate": 1.4009071878145502e-06, + "loss": 0.1052, + "step": 905 + }, + { + "epoch": 2.6224783861671472, + "grad_norm": 1.0757989883422852, + "learning_rate": 1.301996343764319e-06, + "loss": 0.1043, + "step": 910 + }, + { + "epoch": 2.636887608069164, + "grad_norm": 0.7892721891403198, + "learning_rate": 1.2065478738475883e-06, + "loss": 0.0841, + "step": 915 + }, + { + "epoch": 2.6512968299711814, + "grad_norm": 0.8437192440032959, + "learning_rate": 1.1145859040846012e-06, + "loss": 0.0934, + "step": 920 + }, + { + "epoch": 2.6657060518731988, + "grad_norm": 0.8601369857788086, + "learning_rate": 1.0261336792308168e-06, + "loss": 0.0799, + "step": 925 + }, + { + "epoch": 2.680115273775216, + "grad_norm": 0.8176113367080688, + "learning_rate": 9.412135569014807e-07, + "loss": 0.0916, + "step": 930 + }, + { + "epoch": 2.6945244956772334, + "grad_norm": 0.9531677961349487, + "learning_rate": 8.598470019204047e-07, + "loss": 0.1108, + "step": 935 + }, + { + "epoch": 2.7089337175792507, + "grad_norm": 0.8303104043006897, + "learning_rate": 7.820545808943947e-07, + "loss": 0.0974, + "step": 940 + }, + { + "epoch": 2.723342939481268, + "grad_norm": 0.7168823480606079, + "learning_rate": 7.078559570147542e-07, + "loss": 0.0785, + "step": 945 + }, + { + "epoch": 2.7377521613832854, + "grad_norm": 0.729800283908844, + "learning_rate": 6.372698850871101e-07, + "loss": 0.1015, + "step": 950 + }, + { + "epoch": 2.7521613832853027, + "grad_norm": 0.6297175884246826, + "learning_rate": 5.703142067908613e-07, + "loss": 0.0906, + "step": 955 + }, + { + "epoch": 2.76657060518732, + "grad_norm": 0.7142735719680786, + "learning_rate": 5.070058461694261e-07, + "loss": 0.079, + "step": 960 + }, + { + "epoch": 2.7809798270893373, + "grad_norm": 0.6688058376312256, + "learning_rate": 4.4736080535244084e-07, + "loss": 0.0769, + "step": 965 + }, + { + "epoch": 2.795389048991354, + "grad_norm": 0.620917558670044, + "learning_rate": 3.9139416051098053e-07, + "loss": 0.0714, + "step": 970 + }, + { + "epoch": 2.8097982708933715, + "grad_norm": 0.8330137729644775, + "learning_rate": 3.391200580468318e-07, + "loss": 0.1027, + "step": 975 + }, + { + "epoch": 2.824207492795389, + "grad_norm": 0.7449854016304016, + "learning_rate": 2.905517110167899e-07, + "loss": 0.0905, + "step": 980 + }, + { + "epoch": 2.838616714697406, + "grad_norm": 0.7975501418113708, + "learning_rate": 2.457013957928472e-07, + "loss": 0.0921, + "step": 985 + }, + { + "epoch": 2.8530259365994235, + "grad_norm": 0.728843629360199, + "learning_rate": 2.0458044895916516e-07, + "loss": 0.0822, + "step": 990 + }, + { + "epoch": 2.867435158501441, + "grad_norm": 0.6663591265678406, + "learning_rate": 1.6719926444658472e-07, + "loss": 0.1079, + "step": 995 + }, + { + "epoch": 2.881844380403458, + "grad_norm": 0.7752097845077515, + "learning_rate": 1.335672909054081e-07, + "loss": 0.0932, + "step": 1000 + }, + { + "epoch": 2.8962536023054755, + "grad_norm": 0.9080301523208618, + "learning_rate": 1.0369302931710067e-07, + "loss": 0.1012, + "step": 1005 + }, + { + "epoch": 2.910662824207493, + "grad_norm": 0.6191422343254089, + "learning_rate": 7.758403084555499e-08, + "loss": 0.0918, + "step": 1010 + }, + { + "epoch": 2.92507204610951, + "grad_norm": 0.7567998766899109, + "learning_rate": 5.5246894928412396e-08, + "loss": 0.0929, + "step": 1015 + }, + { + "epoch": 2.9394812680115274, + "grad_norm": 0.9494380950927734, + "learning_rate": 3.668726760896246e-08, + "loss": 0.1, + "step": 1020 + }, + { + "epoch": 2.9538904899135447, + "grad_norm": 0.7429489493370056, + "learning_rate": 2.190984010901953e-08, + "loss": 0.0877, + "step": 1025 + }, + { + "epoch": 2.968299711815562, + "grad_norm": 0.901829719543457, + "learning_rate": 1.0918347643146254e-08, + "loss": 0.0965, + "step": 1030 + }, + { + "epoch": 2.9827089337175794, + "grad_norm": 0.87961745262146, + "learning_rate": 3.715568474522146e-09, + "loss": 0.1016, + "step": 1035 + }, + { + "epoch": 2.9971181556195967, + "grad_norm": 0.7325274348258972, + "learning_rate": 3.0332321269865847e-10, + "loss": 0.0982, + "step": 1040 + }, + { + "epoch": 3.0, + "step": 1041, + "total_flos": 1.2938989521139139e+18, + "train_loss": 0.44608225578962896, + "train_runtime": 570.0249, + "train_samples_per_second": 58.424, + "train_steps_per_second": 1.826 + } + ], + "logging_steps": 5, + "max_steps": 1041, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2938989521139139e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..34062bf27075f2a4a10062729881e414f9f605bf --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/1_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c7de76da24cd66d5e1675bd964e8577b91f91572b7dcb07dc367a278e3a3c1 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0f4ec46d94dfbf4802b92bc99254c8773cd02223 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 2_128_e3_3e-5 + results: [] +--- + + + +# 2_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..587c24523502ff354dcf6bf8e57bcc1a667d3d54 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "up_proj", + "down_proj", + "q_proj", + "gate_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79dd8f4fb2aef176bd474bc2c1df2276110fa210 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ebfc0fe3990bdd2d8f6f711020e66112f80d166d77014c3ad0726fc5ade418 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8c3f5f5d7dcb7200522130c39112998c02db91cd --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.208157023321981e+18, + "train_loss": 0.4532986161640928, + "train_runtime": 537.1928, + "train_samples": 10085, + "train_samples_per_second": 56.321, + "train_steps_per_second": 1.765 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8c3f5f5d7dcb7200522130c39112998c02db91cd --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.208157023321981e+18, + "train_loss": 0.4532986161640928, + "train_runtime": 537.1928, + "train_samples": 10085, + "train_samples_per_second": 56.321, + "train_steps_per_second": 1.765 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..74f741ec0d4de309af3d2b6a76a57119d028afe3 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1366 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 948, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01584786053882726, + "grad_norm": 0.7935957312583923, + "learning_rate": 2.4999999999999998e-06, + "loss": 1.5668, + "step": 5 + }, + { + "epoch": 0.03169572107765452, + "grad_norm": 0.6432529091835022, + "learning_rate": 5.625e-06, + "loss": 1.5872, + "step": 10 + }, + { + "epoch": 0.04754358161648178, + "grad_norm": 0.5786689519882202, + "learning_rate": 8.750000000000001e-06, + "loss": 1.5847, + "step": 15 + }, + { + "epoch": 0.06339144215530904, + "grad_norm": 0.4935586750507355, + "learning_rate": 1.1874999999999999e-05, + "loss": 1.6119, + "step": 20 + }, + { + "epoch": 0.07923930269413629, + "grad_norm": 0.4791748523712158, + "learning_rate": 1.5e-05, + "loss": 1.5189, + "step": 25 + }, + { + "epoch": 0.09508716323296355, + "grad_norm": 0.5034691095352173, + "learning_rate": 1.8125e-05, + "loss": 1.5511, + "step": 30 + }, + { + "epoch": 0.1109350237717908, + "grad_norm": 0.4247758984565735, + "learning_rate": 2.125e-05, + "loss": 1.5002, + "step": 35 + }, + { + "epoch": 0.12678288431061807, + "grad_norm": 0.45086759328842163, + "learning_rate": 2.4375e-05, + "loss": 1.4602, + "step": 40 + }, + { + "epoch": 0.14263074484944532, + "grad_norm": 0.4740389287471771, + "learning_rate": 2.75e-05, + "loss": 1.4169, + "step": 45 + }, + { + "epoch": 0.15847860538827258, + "grad_norm": 0.514805257320404, + "learning_rate": 2.9999908614866857e-05, + "loss": 1.4181, + "step": 50 + }, + { + "epoch": 0.17432646592709986, + "grad_norm": 0.5260945558547974, + "learning_rate": 2.9996710252122685e-05, + "loss": 1.4076, + "step": 55 + }, + { + "epoch": 0.1901743264659271, + "grad_norm": 0.5670250058174133, + "learning_rate": 2.9988943746159317e-05, + "loss": 1.3648, + "step": 60 + }, + { + "epoch": 0.20602218700475436, + "grad_norm": 0.5070068836212158, + "learning_rate": 2.9976611462729715e-05, + "loss": 1.3384, + "step": 65 + }, + { + "epoch": 0.2218700475435816, + "grad_norm": 0.6409242153167725, + "learning_rate": 2.995971715836687e-05, + "loss": 1.3523, + "step": 70 + }, + { + "epoch": 0.23771790808240886, + "grad_norm": 0.6170640587806702, + "learning_rate": 2.9938265979239552e-05, + "loss": 1.281, + "step": 75 + }, + { + "epoch": 0.25356576862123614, + "grad_norm": 0.5961747765541077, + "learning_rate": 2.9912264459584732e-05, + "loss": 1.2426, + "step": 80 + }, + { + "epoch": 0.2694136291600634, + "grad_norm": 0.6098426580429077, + "learning_rate": 2.988172051971717e-05, + "loss": 1.2638, + "step": 85 + }, + { + "epoch": 0.28526148969889065, + "grad_norm": 0.6606989502906799, + "learning_rate": 2.9846643463616834e-05, + "loss": 1.2, + "step": 90 + }, + { + "epoch": 0.3011093502377179, + "grad_norm": 0.5983712077140808, + "learning_rate": 2.980704397609482e-05, + "loss": 1.2029, + "step": 95 + }, + { + "epoch": 0.31695721077654515, + "grad_norm": 0.6659141778945923, + "learning_rate": 2.9762934119538628e-05, + "loss": 1.1565, + "step": 100 + }, + { + "epoch": 0.3328050713153724, + "grad_norm": 0.6775039434432983, + "learning_rate": 2.9714327330237873e-05, + "loss": 1.1523, + "step": 105 + }, + { + "epoch": 0.3486529318541997, + "grad_norm": 0.6994650363922119, + "learning_rate": 2.966123841429144e-05, + "loss": 1.1373, + "step": 110 + }, + { + "epoch": 0.36450079239302696, + "grad_norm": 0.7556670308113098, + "learning_rate": 2.9603683543097406e-05, + "loss": 1.1041, + "step": 115 + }, + { + "epoch": 0.3803486529318542, + "grad_norm": 0.7655715346336365, + "learning_rate": 2.9541680248427087e-05, + "loss": 1.0123, + "step": 120 + }, + { + "epoch": 0.39619651347068147, + "grad_norm": 0.7950161099433899, + "learning_rate": 2.9475247417084672e-05, + "loss": 1.0098, + "step": 125 + }, + { + "epoch": 0.4120443740095087, + "grad_norm": 0.8800209760665894, + "learning_rate": 2.9404405285154146e-05, + "loss": 1.041, + "step": 130 + }, + { + "epoch": 0.42789223454833597, + "grad_norm": 0.9518643617630005, + "learning_rate": 2.9329175431835152e-05, + "loss": 1.0278, + "step": 135 + }, + { + "epoch": 0.4437400950871632, + "grad_norm": 0.9128641486167908, + "learning_rate": 2.9249580772869786e-05, + "loss": 1.0163, + "step": 140 + }, + { + "epoch": 0.4595879556259905, + "grad_norm": 0.9705645442008972, + "learning_rate": 2.9165645553562215e-05, + "loss": 0.8437, + "step": 145 + }, + { + "epoch": 0.4754358161648177, + "grad_norm": 1.0577722787857056, + "learning_rate": 2.907739534139334e-05, + "loss": 0.9506, + "step": 150 + }, + { + "epoch": 0.49128367670364503, + "grad_norm": 1.0234817266464233, + "learning_rate": 2.8984857018232682e-05, + "loss": 0.9167, + "step": 155 + }, + { + "epoch": 0.5071315372424723, + "grad_norm": 1.2116918563842773, + "learning_rate": 2.8888058772149923e-05, + "loss": 0.871, + "step": 160 + }, + { + "epoch": 0.5229793977812995, + "grad_norm": 1.11607027053833, + "learning_rate": 2.8787030088828517e-05, + "loss": 0.8295, + "step": 165 + }, + { + "epoch": 0.5388272583201268, + "grad_norm": 0.9830576181411743, + "learning_rate": 2.8681801742584095e-05, + "loss": 0.8536, + "step": 170 + }, + { + "epoch": 0.554675118858954, + "grad_norm": 0.978820264339447, + "learning_rate": 2.8572405786990293e-05, + "loss": 0.8528, + "step": 175 + }, + { + "epoch": 0.5705229793977813, + "grad_norm": 1.1185286045074463, + "learning_rate": 2.8458875545114926e-05, + "loss": 0.7763, + "step": 180 + }, + { + "epoch": 0.5863708399366085, + "grad_norm": 1.193602442741394, + "learning_rate": 2.8341245599369464e-05, + "loss": 0.8648, + "step": 185 + }, + { + "epoch": 0.6022187004754358, + "grad_norm": 0.9558558464050293, + "learning_rate": 2.821955178097488e-05, + "loss": 0.7847, + "step": 190 + }, + { + "epoch": 0.618066561014263, + "grad_norm": 0.9667483568191528, + "learning_rate": 2.8093831159047146e-05, + "loss": 0.7897, + "step": 195 + }, + { + "epoch": 0.6339144215530903, + "grad_norm": 1.0305513143539429, + "learning_rate": 2.7964122029305574e-05, + "loss": 0.8441, + "step": 200 + }, + { + "epoch": 0.6497622820919176, + "grad_norm": 1.1567325592041016, + "learning_rate": 2.78304639024076e-05, + "loss": 0.7726, + "step": 205 + }, + { + "epoch": 0.6656101426307448, + "grad_norm": 0.9978838562965393, + "learning_rate": 2.769289749191346e-05, + "loss": 0.7615, + "step": 210 + }, + { + "epoch": 0.6814580031695721, + "grad_norm": 1.0446453094482422, + "learning_rate": 2.7551464701884426e-05, + "loss": 0.7191, + "step": 215 + }, + { + "epoch": 0.6973058637083994, + "grad_norm": 1.2484354972839355, + "learning_rate": 2.7406208614118427e-05, + "loss": 0.7179, + "step": 220 + }, + { + "epoch": 0.7131537242472267, + "grad_norm": 1.1773909330368042, + "learning_rate": 2.7257173475026926e-05, + "loss": 0.7267, + "step": 225 + }, + { + "epoch": 0.7290015847860539, + "grad_norm": 1.0800909996032715, + "learning_rate": 2.710440468215703e-05, + "loss": 0.6918, + "step": 230 + }, + { + "epoch": 0.7448494453248812, + "grad_norm": 1.2179211378097534, + "learning_rate": 2.6947948770362945e-05, + "loss": 0.6418, + "step": 235 + }, + { + "epoch": 0.7606973058637084, + "grad_norm": 1.0784432888031006, + "learning_rate": 2.678785339763103e-05, + "loss": 0.6643, + "step": 240 + }, + { + "epoch": 0.7765451664025357, + "grad_norm": 1.2810122966766357, + "learning_rate": 2.6624167330562697e-05, + "loss": 0.6634, + "step": 245 + }, + { + "epoch": 0.7923930269413629, + "grad_norm": 1.118651270866394, + "learning_rate": 2.6456940429519637e-05, + "loss": 0.6298, + "step": 250 + }, + { + "epoch": 0.8082408874801902, + "grad_norm": 1.3432537317276, + "learning_rate": 2.6286223633435865e-05, + "loss": 0.5564, + "step": 255 + }, + { + "epoch": 0.8240887480190174, + "grad_norm": 1.159632682800293, + "learning_rate": 2.611206894430125e-05, + "loss": 0.6205, + "step": 260 + }, + { + "epoch": 0.8399366085578447, + "grad_norm": 1.0977771282196045, + "learning_rate": 2.5934529411321174e-05, + "loss": 0.5935, + "step": 265 + }, + { + "epoch": 0.8557844690966719, + "grad_norm": 1.1754155158996582, + "learning_rate": 2.575365911475724e-05, + "loss": 0.5507, + "step": 270 + }, + { + "epoch": 0.8716323296354992, + "grad_norm": 1.2888253927230835, + "learning_rate": 2.556951314945392e-05, + "loss": 0.6145, + "step": 275 + }, + { + "epoch": 0.8874801901743264, + "grad_norm": 1.2895240783691406, + "learning_rate": 2.5382147608056104e-05, + "loss": 0.5967, + "step": 280 + }, + { + "epoch": 0.9033280507131537, + "grad_norm": 1.2376152276992798, + "learning_rate": 2.519161956392275e-05, + "loss": 0.6031, + "step": 285 + }, + { + "epoch": 0.919175911251981, + "grad_norm": 1.241473913192749, + "learning_rate": 2.4997987053741785e-05, + "loss": 0.589, + "step": 290 + }, + { + "epoch": 0.9350237717908082, + "grad_norm": 1.1953359842300415, + "learning_rate": 2.4801309059851586e-05, + "loss": 0.6121, + "step": 295 + }, + { + "epoch": 0.9508716323296355, + "grad_norm": 1.2111769914627075, + "learning_rate": 2.4601645492274334e-05, + "loss": 0.5203, + "step": 300 + }, + { + "epoch": 0.9667194928684627, + "grad_norm": 1.3231160640716553, + "learning_rate": 2.439905717046691e-05, + "loss": 0.5249, + "step": 305 + }, + { + "epoch": 0.9825673534072901, + "grad_norm": 1.2356749773025513, + "learning_rate": 2.419360580479465e-05, + "loss": 0.5198, + "step": 310 + }, + { + "epoch": 0.9984152139461173, + "grad_norm": 1.2111692428588867, + "learning_rate": 2.3985353977733787e-05, + "loss": 0.521, + "step": 315 + }, + { + "epoch": 1.012678288431062, + "grad_norm": 1.3396697044372559, + "learning_rate": 2.3774365124808236e-05, + "loss": 0.3982, + "step": 320 + }, + { + "epoch": 1.0285261489698891, + "grad_norm": 1.215968370437622, + "learning_rate": 2.356070351526648e-05, + "loss": 0.4613, + "step": 325 + }, + { + "epoch": 1.0443740095087164, + "grad_norm": 1.3838987350463867, + "learning_rate": 2.3344434232504573e-05, + "loss": 0.3862, + "step": 330 + }, + { + "epoch": 1.0602218700475436, + "grad_norm": 1.3119263648986816, + "learning_rate": 2.31256231542411e-05, + "loss": 0.4845, + "step": 335 + }, + { + "epoch": 1.076069730586371, + "grad_norm": 1.2162736654281616, + "learning_rate": 2.2904336932450167e-05, + "loss": 0.3764, + "step": 340 + }, + { + "epoch": 1.0919175911251982, + "grad_norm": 1.3342790603637695, + "learning_rate": 2.2680642973058574e-05, + "loss": 0.3696, + "step": 345 + }, + { + "epoch": 1.1077654516640254, + "grad_norm": 1.2232478857040405, + "learning_rate": 2.245460941541333e-05, + "loss": 0.4167, + "step": 350 + }, + { + "epoch": 1.1236133122028527, + "grad_norm": 1.3710737228393555, + "learning_rate": 2.222630511152573e-05, + "loss": 0.4482, + "step": 355 + }, + { + "epoch": 1.13946117274168, + "grad_norm": 1.2606375217437744, + "learning_rate": 2.199579960509837e-05, + "loss": 0.3938, + "step": 360 + }, + { + "epoch": 1.1553090332805072, + "grad_norm": 1.194186806678772, + "learning_rate": 2.176316311034146e-05, + "loss": 0.3649, + "step": 365 + }, + { + "epoch": 1.1711568938193344, + "grad_norm": 1.62546968460083, + "learning_rate": 2.1528466490584914e-05, + "loss": 0.3724, + "step": 370 + }, + { + "epoch": 1.1870047543581617, + "grad_norm": 1.3946568965911865, + "learning_rate": 2.1291781236692657e-05, + "loss": 0.3794, + "step": 375 + }, + { + "epoch": 1.202852614896989, + "grad_norm": 1.3017921447753906, + "learning_rate": 2.1053179445285853e-05, + "loss": 0.3959, + "step": 380 + }, + { + "epoch": 1.2187004754358162, + "grad_norm": 1.2768802642822266, + "learning_rate": 2.0812733796781544e-05, + "loss": 0.3843, + "step": 385 + }, + { + "epoch": 1.2345483359746434, + "grad_norm": 1.3453855514526367, + "learning_rate": 2.0570517533253524e-05, + "loss": 0.3627, + "step": 390 + }, + { + "epoch": 1.2503961965134707, + "grad_norm": 1.2004591226577759, + "learning_rate": 2.0326604436122056e-05, + "loss": 0.3586, + "step": 395 + }, + { + "epoch": 1.266244057052298, + "grad_norm": 1.3465975522994995, + "learning_rate": 2.0081068803679374e-05, + "loss": 0.3589, + "step": 400 + }, + { + "epoch": 1.2820919175911252, + "grad_norm": 1.2242372035980225, + "learning_rate": 1.983398542845767e-05, + "loss": 0.3532, + "step": 405 + }, + { + "epoch": 1.2979397781299524, + "grad_norm": 1.5125539302825928, + "learning_rate": 1.9585429574446588e-05, + "loss": 0.3446, + "step": 410 + }, + { + "epoch": 1.3137876386687797, + "grad_norm": 1.3218587636947632, + "learning_rate": 1.9335476954167073e-05, + "loss": 0.3695, + "step": 415 + }, + { + "epoch": 1.329635499207607, + "grad_norm": 1.3801239728927612, + "learning_rate": 1.9084203705608614e-05, + "loss": 0.3718, + "step": 420 + }, + { + "epoch": 1.3454833597464342, + "grad_norm": 1.3621903657913208, + "learning_rate": 1.883168636903686e-05, + "loss": 0.328, + "step": 425 + }, + { + "epoch": 1.3613312202852614, + "grad_norm": 1.6403703689575195, + "learning_rate": 1.8578001863678713e-05, + "loss": 0.3262, + "step": 430 + }, + { + "epoch": 1.3771790808240887, + "grad_norm": 1.2173627614974976, + "learning_rate": 1.8323227464292014e-05, + "loss": 0.2866, + "step": 435 + }, + { + "epoch": 1.393026941362916, + "grad_norm": 1.3742671012878418, + "learning_rate": 1.8067440777626853e-05, + "loss": 0.2814, + "step": 440 + }, + { + "epoch": 1.4088748019017432, + "grad_norm": 1.2998393774032593, + "learning_rate": 1.781071971878587e-05, + "loss": 0.3016, + "step": 445 + }, + { + "epoch": 1.4247226624405704, + "grad_norm": 1.237502932548523, + "learning_rate": 1.7553142487490488e-05, + "loss": 0.2772, + "step": 450 + }, + { + "epoch": 1.4405705229793977, + "grad_norm": 1.2581987380981445, + "learning_rate": 1.7294787544260573e-05, + "loss": 0.3178, + "step": 455 + }, + { + "epoch": 1.456418383518225, + "grad_norm": 1.4347732067108154, + "learning_rate": 1.7035733586514568e-05, + "loss": 0.2505, + "step": 460 + }, + { + "epoch": 1.4722662440570522, + "grad_norm": 1.2100082635879517, + "learning_rate": 1.6776059524597518e-05, + "loss": 0.2859, + "step": 465 + }, + { + "epoch": 1.4881141045958794, + "grad_norm": 1.2984610795974731, + "learning_rate": 1.6515844457744193e-05, + "loss": 0.2514, + "step": 470 + }, + { + "epoch": 1.5039619651347067, + "grad_norm": 1.2285469770431519, + "learning_rate": 1.6255167649984738e-05, + "loss": 0.2375, + "step": 475 + }, + { + "epoch": 1.519809825673534, + "grad_norm": 1.226462721824646, + "learning_rate": 1.5994108506000005e-05, + "loss": 0.2715, + "step": 480 + }, + { + "epoch": 1.5356576862123612, + "grad_norm": 1.192294955253601, + "learning_rate": 1.57327465469342e-05, + "loss": 0.3177, + "step": 485 + }, + { + "epoch": 1.5515055467511885, + "grad_norm": 1.2200206518173218, + "learning_rate": 1.5471161386171925e-05, + "loss": 0.2597, + "step": 490 + }, + { + "epoch": 1.5673534072900157, + "grad_norm": 1.457835078239441, + "learning_rate": 1.5209432705087183e-05, + "loss": 0.2704, + "step": 495 + }, + { + "epoch": 1.583201267828843, + "grad_norm": 1.324806571006775, + "learning_rate": 1.494764022877165e-05, + "loss": 0.2291, + "step": 500 + }, + { + "epoch": 1.5990491283676702, + "grad_norm": 1.1390082836151123, + "learning_rate": 1.4685863701749648e-05, + "loss": 0.2313, + "step": 505 + }, + { + "epoch": 1.6148969889064975, + "grad_norm": 1.1324515342712402, + "learning_rate": 1.4424182863687201e-05, + "loss": 0.2233, + "step": 510 + }, + { + "epoch": 1.6307448494453247, + "grad_norm": 1.2309553623199463, + "learning_rate": 1.4162677425102542e-05, + "loss": 0.2314, + "step": 515 + }, + { + "epoch": 1.6465927099841522, + "grad_norm": 1.2059212923049927, + "learning_rate": 1.3901427043085528e-05, + "loss": 0.226, + "step": 520 + }, + { + "epoch": 1.6624405705229794, + "grad_norm": 1.3043513298034668, + "learning_rate": 1.36405112970333e-05, + "loss": 0.1997, + "step": 525 + }, + { + "epoch": 1.6782884310618067, + "grad_norm": 1.2730720043182373, + "learning_rate": 1.3380009664409656e-05, + "loss": 0.2836, + "step": 530 + }, + { + "epoch": 1.694136291600634, + "grad_norm": 1.1967113018035889, + "learning_rate": 1.3120001496535434e-05, + "loss": 0.2228, + "step": 535 + }, + { + "epoch": 1.7099841521394612, + "grad_norm": 1.489896535873413, + "learning_rate": 1.2860565994417327e-05, + "loss": 0.2137, + "step": 540 + }, + { + "epoch": 1.7258320126782885, + "grad_norm": 1.4086004495620728, + "learning_rate": 1.2601782184622479e-05, + "loss": 0.2248, + "step": 545 + }, + { + "epoch": 1.7416798732171157, + "grad_norm": 1.5329160690307617, + "learning_rate": 1.2343728895206252e-05, + "loss": 0.2182, + "step": 550 + }, + { + "epoch": 1.757527733755943, + "grad_norm": 1.2316392660140991, + "learning_rate": 1.2086484731700424e-05, + "loss": 0.2135, + "step": 555 + }, + { + "epoch": 1.7733755942947702, + "grad_norm": 1.487317681312561, + "learning_rate": 1.1830128053169172e-05, + "loss": 0.2203, + "step": 560 + }, + { + "epoch": 1.7892234548335975, + "grad_norm": 1.2591822147369385, + "learning_rate": 1.1574736948340163e-05, + "loss": 0.2129, + "step": 565 + }, + { + "epoch": 1.8050713153724247, + "grad_norm": 1.2491317987442017, + "learning_rate": 1.132038921181796e-05, + "loss": 0.199, + "step": 570 + }, + { + "epoch": 1.820919175911252, + "grad_norm": 1.2943191528320312, + "learning_rate": 1.1067162320387032e-05, + "loss": 0.1902, + "step": 575 + }, + { + "epoch": 1.8367670364500792, + "grad_norm": 1.2057480812072754, + "learning_rate": 1.0815133409411564e-05, + "loss": 0.1706, + "step": 580 + }, + { + "epoch": 1.8526148969889065, + "grad_norm": 1.2110016345977783, + "learning_rate": 1.0564379249339306e-05, + "loss": 0.1982, + "step": 585 + }, + { + "epoch": 1.8684627575277337, + "grad_norm": 1.1958625316619873, + "learning_rate": 1.031497622231651e-05, + "loss": 0.1958, + "step": 590 + }, + { + "epoch": 1.884310618066561, + "grad_norm": 1.0796982049942017, + "learning_rate": 1.0067000298921251e-05, + "loss": 0.2132, + "step": 595 + }, + { + "epoch": 1.9001584786053882, + "grad_norm": 0.9155579209327698, + "learning_rate": 9.820527015021981e-06, + "loss": 0.174, + "step": 600 + }, + { + "epoch": 1.9160063391442155, + "grad_norm": 1.301171064376831, + "learning_rate": 9.575631448768618e-06, + "loss": 0.1666, + "step": 605 + }, + { + "epoch": 1.931854199683043, + "grad_norm": 1.1284383535385132, + "learning_rate": 9.332388197722995e-06, + "loss": 0.1674, + "step": 610 + }, + { + "epoch": 1.9477020602218702, + "grad_norm": 1.0448647737503052, + "learning_rate": 9.090871356135733e-06, + "loss": 0.1645, + "step": 615 + }, + { + "epoch": 1.9635499207606975, + "grad_norm": 1.199547529220581, + "learning_rate": 8.851154492376408e-06, + "loss": 0.1789, + "step": 620 + }, + { + "epoch": 1.9793977812995247, + "grad_norm": 1.3927373886108398, + "learning_rate": 8.61331062652391e-06, + "loss": 0.2043, + "step": 625 + }, + { + "epoch": 1.995245641838352, + "grad_norm": 1.167506456375122, + "learning_rate": 8.377412208123823e-06, + "loss": 0.1732, + "step": 630 + }, + { + "epoch": 2.0095087163232965, + "grad_norm": 1.1593416929244995, + "learning_rate": 8.143531094119591e-06, + "loss": 0.1542, + "step": 635 + }, + { + "epoch": 2.025356576862124, + "grad_norm": 1.1884214878082275, + "learning_rate": 7.911738526964192e-06, + "loss": 0.1352, + "step": 640 + }, + { + "epoch": 2.041204437400951, + "grad_norm": 1.1753219366073608, + "learning_rate": 7.682105112919007e-06, + "loss": 0.1261, + "step": 645 + }, + { + "epoch": 2.0570522979397783, + "grad_norm": 0.9288991093635559, + "learning_rate": 7.454700800546474e-06, + "loss": 0.1527, + "step": 650 + }, + { + "epoch": 2.0729001584786055, + "grad_norm": 1.4360142946243286, + "learning_rate": 7.229594859403049e-06, + "loss": 0.146, + "step": 655 + }, + { + "epoch": 2.088748019017433, + "grad_norm": 1.0506342649459839, + "learning_rate": 7.006855858939049e-06, + "loss": 0.1368, + "step": 660 + }, + { + "epoch": 2.10459587955626, + "grad_norm": 1.0200011730194092, + "learning_rate": 6.7865516476117475e-06, + "loss": 0.1199, + "step": 665 + }, + { + "epoch": 2.1204437400950873, + "grad_norm": 1.3693517446517944, + "learning_rate": 6.568749332218045e-06, + "loss": 0.1451, + "step": 670 + }, + { + "epoch": 2.1362916006339145, + "grad_norm": 1.1203068494796753, + "learning_rate": 6.3535152574531025e-06, + "loss": 0.1234, + "step": 675 + }, + { + "epoch": 2.152139461172742, + "grad_norm": 1.002809762954712, + "learning_rate": 6.1409149857011175e-06, + "loss": 0.127, + "step": 680 + }, + { + "epoch": 2.167987321711569, + "grad_norm": 0.9112873673439026, + "learning_rate": 5.931013277064377e-06, + "loss": 0.1251, + "step": 685 + }, + { + "epoch": 2.1838351822503963, + "grad_norm": 1.123333215713501, + "learning_rate": 5.72387406963669e-06, + "loss": 0.125, + "step": 690 + }, + { + "epoch": 2.1996830427892236, + "grad_norm": 1.0192673206329346, + "learning_rate": 5.519560460027236e-06, + "loss": 0.1347, + "step": 695 + }, + { + "epoch": 2.215530903328051, + "grad_norm": 1.136596918106079, + "learning_rate": 5.318134684140739e-06, + "loss": 0.1521, + "step": 700 + }, + { + "epoch": 2.231378763866878, + "grad_norm": 1.0516589879989624, + "learning_rate": 5.119658098219791e-06, + "loss": 0.1294, + "step": 705 + }, + { + "epoch": 2.2472266244057053, + "grad_norm": 0.9626486301422119, + "learning_rate": 4.92419116015515e-06, + "loss": 0.1122, + "step": 710 + }, + { + "epoch": 2.2630744849445326, + "grad_norm": 1.2495557069778442, + "learning_rate": 4.731793411069669e-06, + "loss": 0.1243, + "step": 715 + }, + { + "epoch": 2.27892234548336, + "grad_norm": 0.9899680018424988, + "learning_rate": 4.5425234571815335e-06, + "loss": 0.1075, + "step": 720 + }, + { + "epoch": 2.294770206022187, + "grad_norm": 0.8825737237930298, + "learning_rate": 4.356438951952189e-06, + "loss": 0.1161, + "step": 725 + }, + { + "epoch": 2.3106180665610143, + "grad_norm": 1.0125575065612793, + "learning_rate": 4.173596578524568e-06, + "loss": 0.0976, + "step": 730 + }, + { + "epoch": 2.3264659270998416, + "grad_norm": 0.8015884160995483, + "learning_rate": 3.994052032456853e-06, + "loss": 0.1036, + "step": 735 + }, + { + "epoch": 2.342313787638669, + "grad_norm": 0.8794209361076355, + "learning_rate": 3.8178600047570675e-06, + "loss": 0.1295, + "step": 740 + }, + { + "epoch": 2.358161648177496, + "grad_norm": 0.9067302346229553, + "learning_rate": 3.645074165223656e-06, + "loss": 0.1166, + "step": 745 + }, + { + "epoch": 2.3740095087163233, + "grad_norm": 1.4430241584777832, + "learning_rate": 3.475747146097153e-06, + "loss": 0.1046, + "step": 750 + }, + { + "epoch": 2.3898573692551506, + "grad_norm": 1.0335779190063477, + "learning_rate": 3.309930526027885e-06, + "loss": 0.1177, + "step": 755 + }, + { + "epoch": 2.405705229793978, + "grad_norm": 1.0350884199142456, + "learning_rate": 3.1476748143646437e-06, + "loss": 0.0955, + "step": 760 + }, + { + "epoch": 2.421553090332805, + "grad_norm": 0.8884088397026062, + "learning_rate": 2.9890294357689994e-06, + "loss": 0.097, + "step": 765 + }, + { + "epoch": 2.4374009508716323, + "grad_norm": 0.9744959473609924, + "learning_rate": 2.8340427151601036e-06, + "loss": 0.0934, + "step": 770 + }, + { + "epoch": 2.4532488114104596, + "grad_norm": 0.7675307989120483, + "learning_rate": 2.6827618629944394e-06, + "loss": 0.0995, + "step": 775 + }, + { + "epoch": 2.469096671949287, + "grad_norm": 1.062865138053894, + "learning_rate": 2.5352329608850783e-06, + "loss": 0.0976, + "step": 780 + }, + { + "epoch": 2.484944532488114, + "grad_norm": 0.8599324226379395, + "learning_rate": 2.3915009475647364e-06, + "loss": 0.105, + "step": 785 + }, + { + "epoch": 2.5007923930269413, + "grad_norm": 0.8323907852172852, + "learning_rate": 2.2516096051970438e-06, + "loss": 0.0947, + "step": 790 + }, + { + "epoch": 2.5166402535657686, + "grad_norm": 0.8331909775733948, + "learning_rate": 2.1156015460400333e-06, + "loss": 0.1007, + "step": 795 + }, + { + "epoch": 2.532488114104596, + "grad_norm": 0.7486231923103333, + "learning_rate": 1.9835181994660754e-06, + "loss": 0.0944, + "step": 800 + }, + { + "epoch": 2.548335974643423, + "grad_norm": 0.7970870733261108, + "learning_rate": 1.8553997993420495e-06, + "loss": 0.0976, + "step": 805 + }, + { + "epoch": 2.5641838351822503, + "grad_norm": 0.8264245390892029, + "learning_rate": 1.731285371773741e-06, + "loss": 0.1, + "step": 810 + }, + { + "epoch": 2.5800316957210776, + "grad_norm": 0.9105907678604126, + "learning_rate": 1.6112127232181163e-06, + "loss": 0.0979, + "step": 815 + }, + { + "epoch": 2.595879556259905, + "grad_norm": 0.8460378646850586, + "learning_rate": 1.4952184289670974e-06, + "loss": 0.0856, + "step": 820 + }, + { + "epoch": 2.611727416798732, + "grad_norm": 0.7376194000244141, + "learning_rate": 1.3833378220063713e-06, + "loss": 0.102, + "step": 825 + }, + { + "epoch": 2.6275752773375594, + "grad_norm": 0.7583907246589661, + "learning_rate": 1.2756049822526288e-06, + "loss": 0.0908, + "step": 830 + }, + { + "epoch": 2.6434231378763866, + "grad_norm": 0.8942042589187622, + "learning_rate": 1.172052726172494e-06, + "loss": 0.1116, + "step": 835 + }, + { + "epoch": 2.659270998415214, + "grad_norm": 0.8507908582687378, + "learning_rate": 1.0727125967862972e-06, + "loss": 0.0966, + "step": 840 + }, + { + "epoch": 2.675118858954041, + "grad_norm": 0.7428611516952515, + "learning_rate": 9.776148540597834e-07, + "loss": 0.0918, + "step": 845 + }, + { + "epoch": 2.6909667194928684, + "grad_norm": 0.7430552244186401, + "learning_rate": 8.867884656866181e-07, + "loss": 0.0937, + "step": 850 + }, + { + "epoch": 2.7068145800316956, + "grad_norm": 0.746529221534729, + "learning_rate": 8.002610982645558e-07, + "loss": 0.081, + "step": 855 + }, + { + "epoch": 2.722662440570523, + "grad_norm": 0.9930717349052429, + "learning_rate": 7.180591088679212e-07, + "loss": 0.0928, + "step": 860 + }, + { + "epoch": 2.73851030110935, + "grad_norm": 0.779670238494873, + "learning_rate": 6.402075370189914e-07, + "loss": 0.0968, + "step": 865 + }, + { + "epoch": 2.7543581616481774, + "grad_norm": 0.7826471924781799, + "learning_rate": 5.667300970607192e-07, + "loss": 0.1115, + "step": 870 + }, + { + "epoch": 2.7702060221870046, + "grad_norm": 0.9204393029212952, + "learning_rate": 4.976491709331094e-07, + "loss": 0.0904, + "step": 875 + }, + { + "epoch": 2.786053882725832, + "grad_norm": 0.7752525806427002, + "learning_rate": 4.3298580135546053e-07, + "loss": 0.0894, + "step": 880 + }, + { + "epoch": 2.801901743264659, + "grad_norm": 0.8384532928466797, + "learning_rate": 3.7275968541655104e-07, + "loss": 0.0945, + "step": 885 + }, + { + "epoch": 2.8177496038034864, + "grad_norm": 0.8098379969596863, + "learning_rate": 3.16989168574725e-07, + "loss": 0.0976, + "step": 890 + }, + { + "epoch": 2.8335974643423136, + "grad_norm": 0.9598178267478943, + "learning_rate": 2.6569123906967083e-07, + "loss": 0.1096, + "step": 895 + }, + { + "epoch": 2.849445324881141, + "grad_norm": 0.7886707782745361, + "learning_rate": 2.1888152274764872e-07, + "loss": 0.0873, + "step": 900 + }, + { + "epoch": 2.8652931854199686, + "grad_norm": 0.761610746383667, + "learning_rate": 1.7657427830170824e-07, + "loss": 0.1093, + "step": 905 + }, + { + "epoch": 2.8811410459587954, + "grad_norm": 0.6591787338256836, + "learning_rate": 1.3878239292834604e-07, + "loss": 0.1014, + "step": 910 + }, + { + "epoch": 2.896988906497623, + "grad_norm": 0.6273005604743958, + "learning_rate": 1.0551737840194587e-07, + "loss": 0.0822, + "step": 915 + }, + { + "epoch": 2.91283676703645, + "grad_norm": 0.8146249651908875, + "learning_rate": 7.67893675681769e-08, + "loss": 0.0885, + "step": 920 + }, + { + "epoch": 2.9286846275752776, + "grad_norm": 0.756746768951416, + "learning_rate": 5.260711125743445e-08, + "loss": 0.1009, + "step": 925 + }, + { + "epoch": 2.9445324881141044, + "grad_norm": 0.7030237913131714, + "learning_rate": 3.2977975619250536e-08, + "loss": 0.0856, + "step": 930 + }, + { + "epoch": 2.960380348652932, + "grad_norm": 0.6340619921684265, + "learning_rate": 1.7907939878490376e-08, + "loss": 0.0983, + "step": 935 + }, + { + "epoch": 2.976228209191759, + "grad_norm": 0.6975314617156982, + "learning_rate": 7.401594514026e-09, + "loss": 0.1015, + "step": 940 + }, + { + "epoch": 2.9920760697305866, + "grad_norm": 0.804392397403717, + "learning_rate": 1.4621398604364179e-09, + "loss": 0.0933, + "step": 945 + }, + { + "epoch": 3.0, + "step": 948, + "total_flos": 1.208157023321981e+18, + "train_loss": 0.4532986161640928, + "train_runtime": 537.1928, + "train_samples_per_second": 56.321, + "train_steps_per_second": 1.765 + } + ], + "logging_steps": 5, + "max_steps": 948, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.208157023321981e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..16ce6b082d1d711b4f8645ea747ba820e270b2da --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/2_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed9c22661c203ffab853696b91a5aaa5fe31c5fbd00ff05decb5091e720cccc +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfdde68b238b5b7af85a37f766a1af8922ee3d3f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 3_128_e3_3e-5 + results: [] +--- + + + +# 3_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c06023ace1da00474a3b05ee41fef225a7ec5b99 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "q_proj", + "down_proj", + "gate_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0a1d5a4fe54fa7fbd7474de7935d94c4319efee --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9411c8fd79151bbfe8c79505e78e3c0735d00631610429c814641636e28ade +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eb7c634a34445e5913c00fea7b5503f37ad94024 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3890291186532352e+18, + "train_loss": 0.43522213716975977, + "train_runtime": 592.3344, + "train_samples": 11694, + "train_samples_per_second": 59.227, + "train_steps_per_second": 1.854 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eb7c634a34445e5913c00fea7b5503f37ad94024 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3890291186532352e+18, + "train_loss": 0.43522213716975977, + "train_runtime": 592.3344, + "train_samples": 11694, + "train_samples_per_second": 59.227, + "train_steps_per_second": 1.854 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7e472897af8fdbdfb1000723674effa717ee35a2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1576 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1098, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013679890560875513, + "grad_norm": 0.6185965538024902, + "learning_rate": 2.181818181818182e-06, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.027359781121751026, + "grad_norm": 0.8445557355880737, + "learning_rate": 4.90909090909091e-06, + "loss": 1.6318, + "step": 10 + }, + { + "epoch": 0.04103967168262654, + "grad_norm": 0.8401841521263123, + "learning_rate": 7.636363636363636e-06, + "loss": 1.6271, + "step": 15 + }, + { + "epoch": 0.05471956224350205, + "grad_norm": 0.4916505813598633, + "learning_rate": 1.0363636363636364e-05, + "loss": 1.5634, + "step": 20 + }, + { + "epoch": 0.06839945280437756, + "grad_norm": 0.5140190124511719, + "learning_rate": 1.309090909090909e-05, + "loss": 1.5537, + "step": 25 + }, + { + "epoch": 0.08207934336525308, + "grad_norm": 0.5175161361694336, + "learning_rate": 1.5818181818181818e-05, + "loss": 1.5249, + "step": 30 + }, + { + "epoch": 0.09575923392612859, + "grad_norm": 0.4762283265590668, + "learning_rate": 1.8545454545454545e-05, + "loss": 1.4767, + "step": 35 + }, + { + "epoch": 0.1094391244870041, + "grad_norm": 0.48895174264907837, + "learning_rate": 2.1272727272727273e-05, + "loss": 1.4616, + "step": 40 + }, + { + "epoch": 0.12311901504787962, + "grad_norm": 0.5172759890556335, + "learning_rate": 2.4e-05, + "loss": 1.4579, + "step": 45 + }, + { + "epoch": 0.13679890560875513, + "grad_norm": 0.5437483191490173, + "learning_rate": 2.6727272727272728e-05, + "loss": 1.4686, + "step": 50 + }, + { + "epoch": 0.15047879616963064, + "grad_norm": 0.5608171224594116, + "learning_rate": 2.9454545454545456e-05, + "loss": 1.39, + "step": 55 + }, + { + "epoch": 0.16415868673050615, + "grad_norm": 0.5453359484672546, + "learning_rate": 2.9998911302762293e-05, + "loss": 1.3925, + "step": 60 + }, + { + "epoch": 0.17783857729138167, + "grad_norm": 0.644840657711029, + "learning_rate": 2.9994488741083514e-05, + "loss": 1.3773, + "step": 65 + }, + { + "epoch": 0.19151846785225718, + "grad_norm": 0.5121015310287476, + "learning_rate": 2.9986665273697548e-05, + "loss": 1.3842, + "step": 70 + }, + { + "epoch": 0.2051983584131327, + "grad_norm": 0.6676328182220459, + "learning_rate": 2.997544267504801e-05, + "loss": 1.2908, + "step": 75 + }, + { + "epoch": 0.2188782489740082, + "grad_norm": 0.6694561839103699, + "learning_rate": 2.9960823490536772e-05, + "loss": 1.3257, + "step": 80 + }, + { + "epoch": 0.23255813953488372, + "grad_norm": 0.8120052814483643, + "learning_rate": 2.9942811035946656e-05, + "loss": 1.2997, + "step": 85 + }, + { + "epoch": 0.24623803009575923, + "grad_norm": 0.6103828549385071, + "learning_rate": 2.9921409396689347e-05, + "loss": 1.276, + "step": 90 + }, + { + "epoch": 0.25991792065663477, + "grad_norm": 0.6659409403800964, + "learning_rate": 2.9896623426878805e-05, + "loss": 1.2622, + "step": 95 + }, + { + "epoch": 0.27359781121751026, + "grad_norm": 0.7107391357421875, + "learning_rate": 2.9868458748230293e-05, + "loss": 1.1911, + "step": 100 + }, + { + "epoch": 0.2872777017783858, + "grad_norm": 0.8371039032936096, + "learning_rate": 2.983692174878531e-05, + "loss": 1.17, + "step": 105 + }, + { + "epoch": 0.3009575923392613, + "grad_norm": 0.7415754795074463, + "learning_rate": 2.980201958146272e-05, + "loss": 1.1435, + "step": 110 + }, + { + "epoch": 0.3146374829001368, + "grad_norm": 0.8000269532203674, + "learning_rate": 2.9763760162436405e-05, + "loss": 1.1341, + "step": 115 + }, + { + "epoch": 0.3283173734610123, + "grad_norm": 0.7828715443611145, + "learning_rate": 2.9722152169339765e-05, + "loss": 1.1428, + "step": 120 + }, + { + "epoch": 0.34199726402188785, + "grad_norm": 0.8536815643310547, + "learning_rate": 2.967720503929759e-05, + "loss": 1.0189, + "step": 125 + }, + { + "epoch": 0.35567715458276333, + "grad_norm": 0.926136314868927, + "learning_rate": 2.962892896678557e-05, + "loss": 1.0555, + "step": 130 + }, + { + "epoch": 0.3693570451436389, + "grad_norm": 0.846136212348938, + "learning_rate": 2.9577334901318115e-05, + "loss": 1.0685, + "step": 135 + }, + { + "epoch": 0.38303693570451436, + "grad_norm": 1.0851120948791504, + "learning_rate": 2.952243454496488e-05, + "loss": 1.0467, + "step": 140 + }, + { + "epoch": 0.3967168262653899, + "grad_norm": 0.9068893194198608, + "learning_rate": 2.9464240349696625e-05, + "loss": 0.986, + "step": 145 + }, + { + "epoch": 0.4103967168262654, + "grad_norm": 0.866257905960083, + "learning_rate": 2.9402765514560955e-05, + "loss": 0.93, + "step": 150 + }, + { + "epoch": 0.4240766073871409, + "grad_norm": 0.9289224743843079, + "learning_rate": 2.9338023982688657e-05, + "loss": 0.9395, + "step": 155 + }, + { + "epoch": 0.4377564979480164, + "grad_norm": 0.9241721630096436, + "learning_rate": 2.9270030438131263e-05, + "loss": 0.893, + "step": 160 + }, + { + "epoch": 0.45143638850889195, + "grad_norm": 0.9470856189727783, + "learning_rate": 2.9198800302530532e-05, + "loss": 0.8896, + "step": 165 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 1.3443787097930908, + "learning_rate": 2.912434973162067e-05, + "loss": 0.8985, + "step": 170 + }, + { + "epoch": 0.478796169630643, + "grad_norm": 0.9911470413208008, + "learning_rate": 2.904669561156404e-05, + "loss": 0.94, + "step": 175 + }, + { + "epoch": 0.49247606019151846, + "grad_norm": 0.9000107049942017, + "learning_rate": 2.8965855555121216e-05, + "loss": 0.858, + "step": 180 + }, + { + "epoch": 0.506155950752394, + "grad_norm": 0.9684233069419861, + "learning_rate": 2.8881847897656224e-05, + "loss": 0.8742, + "step": 185 + }, + { + "epoch": 0.5198358413132695, + "grad_norm": 1.1200600862503052, + "learning_rate": 2.879469169297787e-05, + "loss": 0.8724, + "step": 190 + }, + { + "epoch": 0.533515731874145, + "grad_norm": 1.0766150951385498, + "learning_rate": 2.870440670901816e-05, + "loss": 0.8682, + "step": 195 + }, + { + "epoch": 0.5471956224350205, + "grad_norm": 1.0326838493347168, + "learning_rate": 2.8611013423348727e-05, + "loss": 0.7933, + "step": 200 + }, + { + "epoch": 0.560875512995896, + "grad_norm": 0.9329909682273865, + "learning_rate": 2.8514533018536286e-05, + "loss": 0.7966, + "step": 205 + }, + { + "epoch": 0.5745554035567716, + "grad_norm": 1.118505597114563, + "learning_rate": 2.841498737733824e-05, + "loss": 0.7689, + "step": 210 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.223893404006958, + "learning_rate": 2.8312399077739407e-05, + "loss": 0.7701, + "step": 215 + }, + { + "epoch": 0.6019151846785226, + "grad_norm": 1.184329628944397, + "learning_rate": 2.8206791387831136e-05, + "loss": 0.7596, + "step": 220 + }, + { + "epoch": 0.615595075239398, + "grad_norm": 1.1045267581939697, + "learning_rate": 2.8098188260533818e-05, + "loss": 0.7888, + "step": 225 + }, + { + "epoch": 0.6292749658002736, + "grad_norm": 1.1049416065216064, + "learning_rate": 2.7986614328164168e-05, + "loss": 0.693, + "step": 230 + }, + { + "epoch": 0.6429548563611491, + "grad_norm": 1.0553362369537354, + "learning_rate": 2.7872094896848307e-05, + "loss": 0.6748, + "step": 235 + }, + { + "epoch": 0.6566347469220246, + "grad_norm": 1.1535810232162476, + "learning_rate": 2.7754655940782117e-05, + "loss": 0.6598, + "step": 240 + }, + { + "epoch": 0.6703146374829001, + "grad_norm": 1.029013991355896, + "learning_rate": 2.763432409633998e-05, + "loss": 0.6861, + "step": 245 + }, + { + "epoch": 0.6839945280437757, + "grad_norm": 1.2215471267700195, + "learning_rate": 2.751112665603341e-05, + "loss": 0.6873, + "step": 250 + }, + { + "epoch": 0.6976744186046512, + "grad_norm": 1.1955983638763428, + "learning_rate": 2.7385091562320808e-05, + "loss": 0.6656, + "step": 255 + }, + { + "epoch": 0.7113543091655267, + "grad_norm": 1.0637106895446777, + "learning_rate": 2.7256247401269814e-05, + "loss": 0.6352, + "step": 260 + }, + { + "epoch": 0.7250341997264022, + "grad_norm": 1.2128502130508423, + "learning_rate": 2.7124623396073715e-05, + "loss": 0.6501, + "step": 265 + }, + { + "epoch": 0.7387140902872777, + "grad_norm": 1.143717646598816, + "learning_rate": 2.6990249400423305e-05, + "loss": 0.6701, + "step": 270 + }, + { + "epoch": 0.7523939808481532, + "grad_norm": 1.0508838891983032, + "learning_rate": 2.685315589173576e-05, + "loss": 0.6546, + "step": 275 + }, + { + "epoch": 0.7660738714090287, + "grad_norm": 1.1054850816726685, + "learning_rate": 2.6713373964242043e-05, + "loss": 0.6717, + "step": 280 + }, + { + "epoch": 0.7797537619699042, + "grad_norm": 1.349497675895691, + "learning_rate": 2.6570935321934417e-05, + "loss": 0.6096, + "step": 285 + }, + { + "epoch": 0.7934336525307798, + "grad_norm": 1.0515960454940796, + "learning_rate": 2.642587227137564e-05, + "loss": 0.6014, + "step": 290 + }, + { + "epoch": 0.8071135430916553, + "grad_norm": 1.1479864120483398, + "learning_rate": 2.6278217714371496e-05, + "loss": 0.5854, + "step": 295 + }, + { + "epoch": 0.8207934336525308, + "grad_norm": 1.173852801322937, + "learning_rate": 2.612800514050836e-05, + "loss": 0.5491, + "step": 300 + }, + { + "epoch": 0.8344733242134063, + "grad_norm": 1.147654414176941, + "learning_rate": 2.597526861955736e-05, + "loss": 0.6065, + "step": 305 + }, + { + "epoch": 0.8481532147742818, + "grad_norm": 1.1956430673599243, + "learning_rate": 2.582004279374704e-05, + "loss": 0.6162, + "step": 310 + }, + { + "epoch": 0.8618331053351573, + "grad_norm": 1.1807832717895508, + "learning_rate": 2.5662362869906123e-05, + "loss": 0.5552, + "step": 315 + }, + { + "epoch": 0.8755129958960328, + "grad_norm": 1.2477562427520752, + "learning_rate": 2.5502264611478238e-05, + "loss": 0.5673, + "step": 320 + }, + { + "epoch": 0.8891928864569083, + "grad_norm": 1.2181626558303833, + "learning_rate": 2.5339784330410413e-05, + "loss": 0.4845, + "step": 325 + }, + { + "epoch": 0.9028727770177839, + "grad_norm": 1.2216384410858154, + "learning_rate": 2.5174958878917135e-05, + "loss": 0.4812, + "step": 330 + }, + { + "epoch": 0.9165526675786594, + "grad_norm": 1.152969241142273, + "learning_rate": 2.500782564112188e-05, + "loss": 0.5092, + "step": 335 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 1.299973726272583, + "learning_rate": 2.4838422524578027e-05, + "loss": 0.4972, + "step": 340 + }, + { + "epoch": 0.9439124487004104, + "grad_norm": 1.290931224822998, + "learning_rate": 2.4666787951671013e-05, + "loss": 0.5244, + "step": 345 + }, + { + "epoch": 0.957592339261286, + "grad_norm": 1.1887459754943848, + "learning_rate": 2.4492960850903757e-05, + "loss": 0.5031, + "step": 350 + }, + { + "epoch": 0.9712722298221614, + "grad_norm": 1.3441587686538696, + "learning_rate": 2.4316980648067225e-05, + "loss": 0.5005, + "step": 355 + }, + { + "epoch": 0.9849521203830369, + "grad_norm": 1.1988083124160767, + "learning_rate": 2.4138887257298317e-05, + "loss": 0.4548, + "step": 360 + }, + { + "epoch": 0.9986320109439124, + "grad_norm": 1.1527678966522217, + "learning_rate": 2.3958721072026893e-05, + "loss": 0.4764, + "step": 365 + }, + { + "epoch": 1.0109439124487003, + "grad_norm": 1.2051851749420166, + "learning_rate": 2.3776522955814094e-05, + "loss": 0.4221, + "step": 370 + }, + { + "epoch": 1.024623803009576, + "grad_norm": 1.2405331134796143, + "learning_rate": 2.3592334233084115e-05, + "loss": 0.3464, + "step": 375 + }, + { + "epoch": 1.0383036935704515, + "grad_norm": 1.1887716054916382, + "learning_rate": 2.3406196679751376e-05, + "loss": 0.4309, + "step": 380 + }, + { + "epoch": 1.051983584131327, + "grad_norm": 1.1489320993423462, + "learning_rate": 2.3218152513745306e-05, + "loss": 0.4028, + "step": 385 + }, + { + "epoch": 1.0656634746922025, + "grad_norm": 1.364243745803833, + "learning_rate": 2.3028244385434863e-05, + "loss": 0.3568, + "step": 390 + }, + { + "epoch": 1.079343365253078, + "grad_norm": 1.2403271198272705, + "learning_rate": 2.283651536795504e-05, + "loss": 0.4132, + "step": 395 + }, + { + "epoch": 1.0930232558139534, + "grad_norm": 1.0908361673355103, + "learning_rate": 2.2643008947437368e-05, + "loss": 0.3853, + "step": 400 + }, + { + "epoch": 1.106703146374829, + "grad_norm": 1.350903034210205, + "learning_rate": 2.244776901314685e-05, + "loss": 0.3594, + "step": 405 + }, + { + "epoch": 1.1203830369357046, + "grad_norm": 1.0905699729919434, + "learning_rate": 2.22508398475274e-05, + "loss": 0.3389, + "step": 410 + }, + { + "epoch": 1.13406292749658, + "grad_norm": 1.2139724493026733, + "learning_rate": 2.2052266116158157e-05, + "loss": 0.3384, + "step": 415 + }, + { + "epoch": 1.1477428180574556, + "grad_norm": 1.2406617403030396, + "learning_rate": 2.185209285762281e-05, + "loss": 0.3617, + "step": 420 + }, + { + "epoch": 1.161422708618331, + "grad_norm": 1.2640937566757202, + "learning_rate": 2.165036547329444e-05, + "loss": 0.3438, + "step": 425 + }, + { + "epoch": 1.1751025991792066, + "grad_norm": 1.3638293743133545, + "learning_rate": 2.144712971703799e-05, + "loss": 0.4048, + "step": 430 + }, + { + "epoch": 1.188782489740082, + "grad_norm": 1.5192639827728271, + "learning_rate": 2.1242431684832802e-05, + "loss": 0.3921, + "step": 435 + }, + { + "epoch": 1.2024623803009575, + "grad_norm": 1.3431072235107422, + "learning_rate": 2.103631780431759e-05, + "loss": 0.3282, + "step": 440 + }, + { + "epoch": 1.216142270861833, + "grad_norm": 1.0964531898498535, + "learning_rate": 2.0828834824260168e-05, + "loss": 0.372, + "step": 445 + }, + { + "epoch": 1.2298221614227085, + "grad_norm": 1.2516348361968994, + "learning_rate": 2.062002980395433e-05, + "loss": 0.3751, + "step": 450 + }, + { + "epoch": 1.2435020519835842, + "grad_norm": 1.1874600648880005, + "learning_rate": 2.0409950102546334e-05, + "loss": 0.2708, + "step": 455 + }, + { + "epoch": 1.2571819425444597, + "grad_norm": 1.331742286682129, + "learning_rate": 2.0198643368293328e-05, + "loss": 0.3153, + "step": 460 + }, + { + "epoch": 1.2708618331053352, + "grad_norm": 1.2921967506408691, + "learning_rate": 1.998615752775626e-05, + "loss": 0.3126, + "step": 465 + }, + { + "epoch": 1.2845417236662107, + "grad_norm": 1.170741081237793, + "learning_rate": 1.9772540774929624e-05, + "loss": 0.2874, + "step": 470 + }, + { + "epoch": 1.2982216142270862, + "grad_norm": 1.4067572355270386, + "learning_rate": 1.9557841560310556e-05, + "loss": 0.2926, + "step": 475 + }, + { + "epoch": 1.3119015047879616, + "grad_norm": 1.2003134489059448, + "learning_rate": 1.934210857990977e-05, + "loss": 0.2641, + "step": 480 + }, + { + "epoch": 1.3255813953488373, + "grad_norm": 1.3164176940917969, + "learning_rate": 1.912539076420678e-05, + "loss": 0.3498, + "step": 485 + }, + { + "epoch": 1.3392612859097128, + "grad_norm": 1.1168181896209717, + "learning_rate": 1.890773726705198e-05, + "loss": 0.2886, + "step": 490 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.2587072849273682, + "learning_rate": 1.8689197454518034e-05, + "loss": 0.3252, + "step": 495 + }, + { + "epoch": 1.3666210670314638, + "grad_norm": 1.3926414251327515, + "learning_rate": 1.846982089370312e-05, + "loss": 0.3156, + "step": 500 + }, + { + "epoch": 1.3803009575923393, + "grad_norm": 1.2107939720153809, + "learning_rate": 1.824965734148863e-05, + "loss": 0.3232, + "step": 505 + }, + { + "epoch": 1.3939808481532148, + "grad_norm": 1.1805170774459839, + "learning_rate": 1.8028756733253758e-05, + "loss": 0.2648, + "step": 510 + }, + { + "epoch": 1.4076607387140903, + "grad_norm": 1.2004907131195068, + "learning_rate": 1.7807169171549677e-05, + "loss": 0.3304, + "step": 515 + }, + { + "epoch": 1.4213406292749657, + "grad_norm": 1.304620623588562, + "learning_rate": 1.7584944914735713e-05, + "loss": 0.2946, + "step": 520 + }, + { + "epoch": 1.4350205198358412, + "grad_norm": 1.2282333374023438, + "learning_rate": 1.7362134365580268e-05, + "loss": 0.2599, + "step": 525 + }, + { + "epoch": 1.4487004103967167, + "grad_norm": 1.2331770658493042, + "learning_rate": 1.7138788059828935e-05, + "loss": 0.26, + "step": 530 + }, + { + "epoch": 1.4623803009575924, + "grad_norm": 1.366209864616394, + "learning_rate": 1.6914956654742454e-05, + "loss": 0.2841, + "step": 535 + }, + { + "epoch": 1.476060191518468, + "grad_norm": 1.263872504234314, + "learning_rate": 1.6690690917607138e-05, + "loss": 0.2931, + "step": 540 + }, + { + "epoch": 1.4897400820793434, + "grad_norm": 1.4606742858886719, + "learning_rate": 1.6466041714220316e-05, + "loss": 0.2546, + "step": 545 + }, + { + "epoch": 1.5034199726402189, + "grad_norm": 1.2493213415145874, + "learning_rate": 1.6241059997353442e-05, + "loss": 0.276, + "step": 550 + }, + { + "epoch": 1.5170998632010944, + "grad_norm": 1.2433422803878784, + "learning_rate": 1.6015796795195485e-05, + "loss": 0.2741, + "step": 555 + }, + { + "epoch": 1.53077975376197, + "grad_norm": 1.3792999982833862, + "learning_rate": 1.5790303199779194e-05, + "loss": 0.2436, + "step": 560 + }, + { + "epoch": 1.5444596443228455, + "grad_norm": 1.268170952796936, + "learning_rate": 1.5564630355392902e-05, + "loss": 0.2601, + "step": 565 + }, + { + "epoch": 1.558139534883721, + "grad_norm": 1.3652843236923218, + "learning_rate": 1.5338829446980464e-05, + "loss": 0.2487, + "step": 570 + }, + { + "epoch": 1.5718194254445965, + "grad_norm": 1.2544760704040527, + "learning_rate": 1.5112951688532002e-05, + "loss": 0.2617, + "step": 575 + }, + { + "epoch": 1.585499316005472, + "grad_norm": 1.417412281036377, + "learning_rate": 1.4887048311468002e-05, + "loss": 0.2632, + "step": 580 + }, + { + "epoch": 1.5991792065663475, + "grad_norm": 1.3506377935409546, + "learning_rate": 1.4661170553019537e-05, + "loss": 0.2995, + "step": 585 + }, + { + "epoch": 1.612859097127223, + "grad_norm": 1.1712875366210938, + "learning_rate": 1.4435369644607104e-05, + "loss": 0.2229, + "step": 590 + }, + { + "epoch": 1.6265389876880985, + "grad_norm": 1.3840889930725098, + "learning_rate": 1.4209696800220807e-05, + "loss": 0.2527, + "step": 595 + }, + { + "epoch": 1.640218878248974, + "grad_norm": 1.2742667198181152, + "learning_rate": 1.3984203204804517e-05, + "loss": 0.2459, + "step": 600 + }, + { + "epoch": 1.6538987688098494, + "grad_norm": 1.0372296571731567, + "learning_rate": 1.3758940002646562e-05, + "loss": 0.2223, + "step": 605 + }, + { + "epoch": 1.667578659370725, + "grad_norm": 1.183053731918335, + "learning_rate": 1.3533958285779687e-05, + "loss": 0.198, + "step": 610 + }, + { + "epoch": 1.6812585499316004, + "grad_norm": 1.110137701034546, + "learning_rate": 1.3309309082392864e-05, + "loss": 0.2159, + "step": 615 + }, + { + "epoch": 1.694938440492476, + "grad_norm": 1.1705718040466309, + "learning_rate": 1.3085043345257553e-05, + "loss": 0.205, + "step": 620 + }, + { + "epoch": 1.7086183310533516, + "grad_norm": 1.165091633796692, + "learning_rate": 1.2861211940171067e-05, + "loss": 0.2077, + "step": 625 + }, + { + "epoch": 1.722298221614227, + "grad_norm": 1.1381934881210327, + "learning_rate": 1.2637865634419735e-05, + "loss": 0.2138, + "step": 630 + }, + { + "epoch": 1.7359781121751026, + "grad_norm": 1.236931324005127, + "learning_rate": 1.2415055085264289e-05, + "loss": 0.2131, + "step": 635 + }, + { + "epoch": 1.7496580027359783, + "grad_norm": 1.2112739086151123, + "learning_rate": 1.2192830828450327e-05, + "loss": 0.2044, + "step": 640 + }, + { + "epoch": 1.7633378932968538, + "grad_norm": 1.195837140083313, + "learning_rate": 1.1971243266746243e-05, + "loss": 0.203, + "step": 645 + }, + { + "epoch": 1.7770177838577292, + "grad_norm": 1.0561825037002563, + "learning_rate": 1.175034265851137e-05, + "loss": 0.2193, + "step": 650 + }, + { + "epoch": 1.7906976744186047, + "grad_norm": 1.3426496982574463, + "learning_rate": 1.1530179106296881e-05, + "loss": 0.2012, + "step": 655 + }, + { + "epoch": 1.8043775649794802, + "grad_norm": 1.1832002401351929, + "learning_rate": 1.131080254548197e-05, + "loss": 0.2072, + "step": 660 + }, + { + "epoch": 1.8180574555403557, + "grad_norm": 1.068729281425476, + "learning_rate": 1.1092262732948017e-05, + "loss": 0.1892, + "step": 665 + }, + { + "epoch": 1.8317373461012312, + "grad_norm": 1.1419812440872192, + "learning_rate": 1.0874609235793222e-05, + "loss": 0.1797, + "step": 670 + }, + { + "epoch": 1.8454172366621067, + "grad_norm": 0.9457129836082458, + "learning_rate": 1.0657891420090236e-05, + "loss": 0.1719, + "step": 675 + }, + { + "epoch": 1.8590971272229821, + "grad_norm": 1.124772548675537, + "learning_rate": 1.0442158439689444e-05, + "loss": 0.1656, + "step": 680 + }, + { + "epoch": 1.8727770177838576, + "grad_norm": 1.271947979927063, + "learning_rate": 1.0227459225070379e-05, + "loss": 0.1706, + "step": 685 + }, + { + "epoch": 1.8864569083447331, + "grad_norm": 1.0878015756607056, + "learning_rate": 1.0013842472243742e-05, + "loss": 0.1556, + "step": 690 + }, + { + "epoch": 1.9001367989056086, + "grad_norm": 1.0198779106140137, + "learning_rate": 9.801356631706676e-06, + "loss": 0.1591, + "step": 695 + }, + { + "epoch": 1.9138166894664843, + "grad_norm": 1.2229840755462646, + "learning_rate": 9.590049897453668e-06, + "loss": 0.164, + "step": 700 + }, + { + "epoch": 1.9274965800273598, + "grad_norm": 1.159407377243042, + "learning_rate": 9.379970196045672e-06, + "loss": 0.1566, + "step": 705 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.165200114250183, + "learning_rate": 9.171165175739832e-06, + "loss": 0.1753, + "step": 710 + }, + { + "epoch": 1.9548563611491108, + "grad_norm": 1.2914658784866333, + "learning_rate": 8.96368219568241e-06, + "loss": 0.1797, + "step": 715 + }, + { + "epoch": 1.9685362517099865, + "grad_norm": 1.0982403755187988, + "learning_rate": 8.7575683151672e-06, + "loss": 0.1646, + "step": 720 + }, + { + "epoch": 1.982216142270862, + "grad_norm": 1.2066375017166138, + "learning_rate": 8.552870282962012e-06, + "loss": 0.1376, + "step": 725 + }, + { + "epoch": 1.9958960328317374, + "grad_norm": 1.1736009120941162, + "learning_rate": 8.349634526705558e-06, + "loss": 0.1523, + "step": 730 + }, + { + "epoch": 2.008207934336525, + "grad_norm": 0.967475414276123, + "learning_rate": 8.147907142377198e-06, + "loss": 0.1525, + "step": 735 + }, + { + "epoch": 2.0218878248974006, + "grad_norm": 0.9452887773513794, + "learning_rate": 7.947733883841847e-06, + "loss": 0.1239, + "step": 740 + }, + { + "epoch": 2.0355677154582765, + "grad_norm": 0.9843199849128723, + "learning_rate": 7.749160152472603e-06, + "loss": 0.1503, + "step": 745 + }, + { + "epoch": 2.049247606019152, + "grad_norm": 1.0030560493469238, + "learning_rate": 7.552230986853153e-06, + "loss": 0.1253, + "step": 750 + }, + { + "epoch": 2.0629274965800275, + "grad_norm": 0.9782796502113342, + "learning_rate": 7.35699105256263e-06, + "loss": 0.1087, + "step": 755 + }, + { + "epoch": 2.076607387140903, + "grad_norm": 1.1625540256500244, + "learning_rate": 7.1634846320449625e-06, + "loss": 0.1253, + "step": 760 + }, + { + "epoch": 2.0902872777017785, + "grad_norm": 0.8619825839996338, + "learning_rate": 6.971755614565131e-06, + "loss": 0.1094, + "step": 765 + }, + { + "epoch": 2.103967168262654, + "grad_norm": 1.0183171033859253, + "learning_rate": 6.781847486254698e-06, + "loss": 0.1219, + "step": 770 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 0.9013718366622925, + "learning_rate": 6.593803320248625e-06, + "loss": 0.1249, + "step": 775 + }, + { + "epoch": 2.131326949384405, + "grad_norm": 0.9241868257522583, + "learning_rate": 6.407665766915886e-06, + "loss": 0.1062, + "step": 780 + }, + { + "epoch": 2.1450068399452804, + "grad_norm": 0.9759536385536194, + "learning_rate": 6.223477044185909e-06, + "loss": 0.1359, + "step": 785 + }, + { + "epoch": 2.158686730506156, + "grad_norm": 1.6658540964126587, + "learning_rate": 6.04127892797311e-06, + "loss": 0.1167, + "step": 790 + }, + { + "epoch": 2.1723666210670314, + "grad_norm": 1.1628588438034058, + "learning_rate": 5.861112742701678e-06, + "loss": 0.1024, + "step": 795 + }, + { + "epoch": 2.186046511627907, + "grad_norm": 0.9043513536453247, + "learning_rate": 5.683019351932775e-06, + "loss": 0.1255, + "step": 800 + }, + { + "epoch": 2.1997264021887823, + "grad_norm": 0.9675664901733398, + "learning_rate": 5.507039149096251e-06, + "loss": 0.1065, + "step": 805 + }, + { + "epoch": 2.213406292749658, + "grad_norm": 0.8183343410491943, + "learning_rate": 5.333212048328983e-06, + "loss": 0.1061, + "step": 810 + }, + { + "epoch": 2.2270861833105333, + "grad_norm": 0.8836042284965515, + "learning_rate": 5.161577475421978e-06, + "loss": 0.1011, + "step": 815 + }, + { + "epoch": 2.2407660738714092, + "grad_norm": 1.1208627223968506, + "learning_rate": 4.992174358878126e-06, + "loss": 0.1269, + "step": 820 + }, + { + "epoch": 2.2544459644322847, + "grad_norm": 0.9704207181930542, + "learning_rate": 4.82504112108287e-06, + "loss": 0.1027, + "step": 825 + }, + { + "epoch": 2.26812585499316, + "grad_norm": 0.9498217701911926, + "learning_rate": 4.660215669589589e-06, + "loss": 0.0905, + "step": 830 + }, + { + "epoch": 2.2818057455540357, + "grad_norm": 0.998333752155304, + "learning_rate": 4.497735388521762e-06, + "loss": 0.116, + "step": 835 + }, + { + "epoch": 2.295485636114911, + "grad_norm": 1.2976107597351074, + "learning_rate": 4.337637130093879e-06, + "loss": 0.122, + "step": 840 + }, + { + "epoch": 2.3091655266757867, + "grad_norm": 1.030010461807251, + "learning_rate": 4.179957206252962e-06, + "loss": 0.1029, + "step": 845 + }, + { + "epoch": 2.322845417236662, + "grad_norm": 0.9905585050582886, + "learning_rate": 4.0247313804426455e-06, + "loss": 0.1284, + "step": 850 + }, + { + "epoch": 2.3365253077975376, + "grad_norm": 0.7810108065605164, + "learning_rate": 3.871994859491643e-06, + "loss": 0.1099, + "step": 855 + }, + { + "epoch": 2.350205198358413, + "grad_norm": 1.0354982614517212, + "learning_rate": 3.7217822856285087e-06, + "loss": 0.0973, + "step": 860 + }, + { + "epoch": 2.3638850889192886, + "grad_norm": 0.89438796043396, + "learning_rate": 3.574127728624365e-06, + "loss": 0.1089, + "step": 865 + }, + { + "epoch": 2.377564979480164, + "grad_norm": 0.8592391014099121, + "learning_rate": 3.429064678065584e-06, + "loss": 0.1007, + "step": 870 + }, + { + "epoch": 2.3912448700410396, + "grad_norm": 1.0615414381027222, + "learning_rate": 3.28662603575796e-06, + "loss": 0.1042, + "step": 875 + }, + { + "epoch": 2.404924760601915, + "grad_norm": 0.9207342267036438, + "learning_rate": 3.1468441082642396e-06, + "loss": 0.1083, + "step": 880 + }, + { + "epoch": 2.4186046511627906, + "grad_norm": 0.8157722353935242, + "learning_rate": 3.009750599576698e-06, + "loss": 0.097, + "step": 885 + }, + { + "epoch": 2.432284541723666, + "grad_norm": 0.8743755221366882, + "learning_rate": 2.8753766039262872e-06, + "loss": 0.0866, + "step": 890 + }, + { + "epoch": 2.4459644322845415, + "grad_norm": 0.8427540063858032, + "learning_rate": 2.7437525987301887e-06, + "loss": 0.1148, + "step": 895 + }, + { + "epoch": 2.459644322845417, + "grad_norm": 0.8525367379188538, + "learning_rate": 2.614908437679195e-06, + "loss": 0.1056, + "step": 900 + }, + { + "epoch": 2.473324213406293, + "grad_norm": 1.5666687488555908, + "learning_rate": 2.4888733439665895e-06, + "loss": 0.1007, + "step": 905 + }, + { + "epoch": 2.4870041039671684, + "grad_norm": 1.0735810995101929, + "learning_rate": 2.365675903660019e-06, + "loss": 0.0927, + "step": 910 + }, + { + "epoch": 2.500683994528044, + "grad_norm": 1.129577398300171, + "learning_rate": 2.2453440592178837e-06, + "loss": 0.104, + "step": 915 + }, + { + "epoch": 2.5143638850889194, + "grad_norm": 0.7014889121055603, + "learning_rate": 2.1279051031516926e-06, + "loss": 0.0998, + "step": 920 + }, + { + "epoch": 2.528043775649795, + "grad_norm": 0.9595668315887451, + "learning_rate": 2.013385671835831e-06, + "loss": 0.1119, + "step": 925 + }, + { + "epoch": 2.5417236662106704, + "grad_norm": 0.7875573039054871, + "learning_rate": 1.9018117394661816e-06, + "loss": 0.0886, + "step": 930 + }, + { + "epoch": 2.555403556771546, + "grad_norm": 0.6888709664344788, + "learning_rate": 1.7932086121688668e-06, + "loss": 0.0782, + "step": 935 + }, + { + "epoch": 2.5690834473324213, + "grad_norm": 0.9888945817947388, + "learning_rate": 1.6876009222605926e-06, + "loss": 0.0946, + "step": 940 + }, + { + "epoch": 2.582763337893297, + "grad_norm": 0.8661866188049316, + "learning_rate": 1.5850126226617611e-06, + "loss": 0.0982, + "step": 945 + }, + { + "epoch": 2.5964432284541723, + "grad_norm": 0.7992923259735107, + "learning_rate": 1.4854669814637145e-06, + "loss": 0.1028, + "step": 950 + }, + { + "epoch": 2.610123119015048, + "grad_norm": 0.7881180644035339, + "learning_rate": 1.388986576651276e-06, + "loss": 0.1022, + "step": 955 + }, + { + "epoch": 2.6238030095759233, + "grad_norm": 0.9455990791320801, + "learning_rate": 1.2955932909818403e-06, + "loss": 0.0911, + "step": 960 + }, + { + "epoch": 2.6374829001367988, + "grad_norm": 0.8477586507797241, + "learning_rate": 1.2053083070221326e-06, + "loss": 0.1006, + "step": 965 + }, + { + "epoch": 2.6511627906976747, + "grad_norm": 0.7237916588783264, + "learning_rate": 1.1181521023437751e-06, + "loss": 0.0935, + "step": 970 + }, + { + "epoch": 2.66484268125855, + "grad_norm": 0.8507518768310547, + "learning_rate": 1.034144444878784e-06, + "loss": 0.0978, + "step": 975 + }, + { + "epoch": 2.6785225718194257, + "grad_norm": 0.9054015874862671, + "learning_rate": 9.533043884359616e-07, + "loss": 0.0849, + "step": 980 + }, + { + "epoch": 2.692202462380301, + "grad_norm": 0.6913324594497681, + "learning_rate": 8.756502683793366e-07, + "loss": 0.0908, + "step": 985 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.6964243054389954, + "learning_rate": 8.011996974694708e-07, + "loss": 0.0893, + "step": 990 + }, + { + "epoch": 2.719562243502052, + "grad_norm": 0.891846239566803, + "learning_rate": 7.299695618687357e-07, + "loss": 0.0829, + "step": 995 + }, + { + "epoch": 2.7332421340629276, + "grad_norm": 0.7170153260231018, + "learning_rate": 6.619760173113437e-07, + "loss": 0.0856, + "step": 1000 + }, + { + "epoch": 2.746922024623803, + "grad_norm": 0.7144414186477661, + "learning_rate": 5.972344854390482e-07, + "loss": 0.0998, + "step": 1005 + }, + { + "epoch": 2.7606019151846786, + "grad_norm": 0.8118796348571777, + "learning_rate": 5.357596503033773e-07, + "loss": 0.0922, + "step": 1010 + }, + { + "epoch": 2.774281805745554, + "grad_norm": 1.3741660118103027, + "learning_rate": 4.775654550351194e-07, + "loss": 0.1051, + "step": 1015 + }, + { + "epoch": 2.7879616963064295, + "grad_norm": 0.8002065420150757, + "learning_rate": 4.2266509868188584e-07, + "loss": 0.088, + "step": 1020 + }, + { + "epoch": 2.801641586867305, + "grad_norm": 0.7380777597427368, + "learning_rate": 3.7107103321443125e-07, + "loss": 0.0943, + "step": 1025 + }, + { + "epoch": 2.8153214774281805, + "grad_norm": 0.7178666591644287, + "learning_rate": 3.2279496070241053e-07, + "loss": 0.0798, + "step": 1030 + }, + { + "epoch": 2.829001367989056, + "grad_norm": 0.6438109278678894, + "learning_rate": 2.7784783066023553e-07, + "loss": 0.0988, + "step": 1035 + }, + { + "epoch": 2.8426812585499315, + "grad_norm": 1.0044842958450317, + "learning_rate": 2.3623983756359825e-07, + "loss": 0.087, + "step": 1040 + }, + { + "epoch": 2.856361149110807, + "grad_norm": 0.7705051302909851, + "learning_rate": 1.979804185372802e-07, + "loss": 0.0962, + "step": 1045 + }, + { + "epoch": 2.8700410396716824, + "grad_norm": 0.682049572467804, + "learning_rate": 1.6307825121469165e-07, + "loss": 0.067, + "step": 1050 + }, + { + "epoch": 2.883720930232558, + "grad_norm": 0.6359822154045105, + "learning_rate": 1.3154125176970732e-07, + "loss": 0.0961, + "step": 1055 + }, + { + "epoch": 2.8974008207934334, + "grad_norm": 0.7508834004402161, + "learning_rate": 1.0337657312119441e-07, + "loss": 0.0949, + "step": 1060 + }, + { + "epoch": 2.911080711354309, + "grad_norm": 0.6761547923088074, + "learning_rate": 7.859060331065371e-08, + "loss": 0.0837, + "step": 1065 + }, + { + "epoch": 2.924760601915185, + "grad_norm": 0.7349043488502502, + "learning_rate": 5.7188964053345174e-08, + "loss": 0.0738, + "step": 1070 + }, + { + "epoch": 2.9384404924760603, + "grad_norm": 0.6732359528541565, + "learning_rate": 3.9176509463227926e-08, + "loss": 0.0911, + "step": 1075 + }, + { + "epoch": 2.952120383036936, + "grad_norm": 0.7374727129936218, + "learning_rate": 2.4557324951994253e-08, + "loss": 0.0935, + "step": 1080 + }, + { + "epoch": 2.9658002735978113, + "grad_norm": 0.7297447323799133, + "learning_rate": 1.3334726302454136e-08, + "loss": 0.0832, + "step": 1085 + }, + { + "epoch": 2.9794801641586868, + "grad_norm": 0.7114595174789429, + "learning_rate": 5.511258916485185e-09, + "loss": 0.0865, + "step": 1090 + }, + { + "epoch": 2.9931600547195623, + "grad_norm": 0.6272600889205933, + "learning_rate": 1.088697237709435e-09, + "loss": 0.073, + "step": 1095 + }, + { + "epoch": 3.0, + "step": 1098, + "total_flos": 1.3890291186532352e+18, + "train_loss": 0.43522213716975977, + "train_runtime": 592.3344, + "train_samples_per_second": 59.227, + "train_steps_per_second": 1.854 + } + ], + "logging_steps": 5, + "max_steps": 1098, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3890291186532352e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1cdff885d3adad1878c04476c3d4b5a392b1480e --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/3_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70836e808e1dfa664bb0789595374ada38887049a857e3c3a638a09f5db189de +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0354ec7f3b17ed86984a80a04d8b10c7a7db3c34 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 4_128_e3_3e-5 + results: [] +--- + + + +# 4_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..53df69f62125c7f74dcd15dbeb5ce5004cd235e5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "gate_proj", + "o_proj", + "down_proj", + "k_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef00b358b5feb72f897604ea9295a8481b6fb3e5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c74451f1c700e260438bf4c326e58888c058528ad8cc4e1f5dd159416e289767 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89316a420ee47e9868f773df1baf1fa5c95a1155 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2667695529522627e+18, + "train_loss": 0.4250270954708555, + "train_runtime": 542.136, + "train_samples": 10720, + "train_samples_per_second": 59.321, + "train_steps_per_second": 1.854 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89316a420ee47e9868f773df1baf1fa5c95a1155 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2667695529522627e+18, + "train_loss": 0.4250270954708555, + "train_runtime": 542.136, + "train_samples": 10720, + "train_samples_per_second": 59.321, + "train_steps_per_second": 1.854 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2a3121afdc653baa064b371d51bcc071ee96db6e --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1450 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1005, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014925373134328358, + "grad_norm": 0.6268998384475708, + "learning_rate": 2.3529411764705885e-06, + "loss": 1.6734, + "step": 5 + }, + { + "epoch": 0.029850746268656716, + "grad_norm": 0.5933209657669067, + "learning_rate": 5.294117647058824e-06, + "loss": 1.6557, + "step": 10 + }, + { + "epoch": 0.04477611940298507, + "grad_norm": 0.5816971063613892, + "learning_rate": 8.23529411764706e-06, + "loss": 1.6225, + "step": 15 + }, + { + "epoch": 0.05970149253731343, + "grad_norm": 0.5397945046424866, + "learning_rate": 1.1176470588235295e-05, + "loss": 1.622, + "step": 20 + }, + { + "epoch": 0.07462686567164178, + "grad_norm": 0.4571664333343506, + "learning_rate": 1.411764705882353e-05, + "loss": 1.5212, + "step": 25 + }, + { + "epoch": 0.08955223880597014, + "grad_norm": 0.5729788541793823, + "learning_rate": 1.7058823529411763e-05, + "loss": 1.5276, + "step": 30 + }, + { + "epoch": 0.1044776119402985, + "grad_norm": 0.4619938135147095, + "learning_rate": 1.9999999999999998e-05, + "loss": 1.5577, + "step": 35 + }, + { + "epoch": 0.11940298507462686, + "grad_norm": 0.45826056599617004, + "learning_rate": 2.2941176470588233e-05, + "loss": 1.5446, + "step": 40 + }, + { + "epoch": 0.13432835820895522, + "grad_norm": 0.4920828640460968, + "learning_rate": 2.5882352941176472e-05, + "loss": 1.4282, + "step": 45 + }, + { + "epoch": 0.14925373134328357, + "grad_norm": 0.4871051013469696, + "learning_rate": 2.8823529411764707e-05, + "loss": 1.4083, + "step": 50 + }, + { + "epoch": 0.16417910447761194, + "grad_norm": 0.5118778944015503, + "learning_rate": 2.9999268013221688e-05, + "loss": 1.3535, + "step": 55 + }, + { + "epoch": 0.1791044776119403, + "grad_norm": 0.520685076713562, + "learning_rate": 2.9994795019399927e-05, + "loss": 1.3604, + "step": 60 + }, + { + "epoch": 0.19402985074626866, + "grad_norm": 0.5544566512107849, + "learning_rate": 2.9986256902246587e-05, + "loss": 1.3267, + "step": 65 + }, + { + "epoch": 0.208955223880597, + "grad_norm": 0.6241164207458496, + "learning_rate": 2.9973655976464824e-05, + "loss": 1.3184, + "step": 70 + }, + { + "epoch": 0.22388059701492538, + "grad_norm": 0.6194041967391968, + "learning_rate": 2.995699565819452e-05, + "loss": 1.2358, + "step": 75 + }, + { + "epoch": 0.23880597014925373, + "grad_norm": 0.623253345489502, + "learning_rate": 2.993628046408618e-05, + "loss": 1.2075, + "step": 80 + }, + { + "epoch": 0.2537313432835821, + "grad_norm": 0.6744920611381531, + "learning_rate": 2.991151601007646e-05, + "loss": 1.223, + "step": 85 + }, + { + "epoch": 0.26865671641791045, + "grad_norm": 0.7486079335212708, + "learning_rate": 2.9882709009865653e-05, + "loss": 1.1576, + "step": 90 + }, + { + "epoch": 0.2835820895522388, + "grad_norm": 0.6923242211341858, + "learning_rate": 2.98498672730976e-05, + "loss": 1.1546, + "step": 95 + }, + { + "epoch": 0.29850746268656714, + "grad_norm": 0.6951536536216736, + "learning_rate": 2.9812999703242502e-05, + "loss": 1.1705, + "step": 100 + }, + { + "epoch": 0.31343283582089554, + "grad_norm": 0.805213212966919, + "learning_rate": 2.977211629518312e-05, + "loss": 1.0864, + "step": 105 + }, + { + "epoch": 0.3283582089552239, + "grad_norm": 0.8616625666618347, + "learning_rate": 2.9727228132505178e-05, + "loss": 1.1553, + "step": 110 + }, + { + "epoch": 0.34328358208955223, + "grad_norm": 0.8333641290664673, + "learning_rate": 2.967834738449256e-05, + "loss": 1.1107, + "step": 115 + }, + { + "epoch": 0.3582089552238806, + "grad_norm": 0.728814959526062, + "learning_rate": 2.9625487302828198e-05, + "loss": 1.0676, + "step": 120 + }, + { + "epoch": 0.373134328358209, + "grad_norm": 0.8003271222114563, + "learning_rate": 2.956866221800151e-05, + "loss": 1.0097, + "step": 125 + }, + { + "epoch": 0.3880597014925373, + "grad_norm": 0.9434797167778015, + "learning_rate": 2.9507887535423357e-05, + "loss": 0.9858, + "step": 130 + }, + { + "epoch": 0.40298507462686567, + "grad_norm": 0.8215721249580383, + "learning_rate": 2.944317973124962e-05, + "loss": 0.9332, + "step": 135 + }, + { + "epoch": 0.417910447761194, + "grad_norm": 0.9685243368148804, + "learning_rate": 2.937455634791447e-05, + "loss": 0.9248, + "step": 140 + }, + { + "epoch": 0.43283582089552236, + "grad_norm": 0.9638697504997253, + "learning_rate": 2.9302035989374565e-05, + "loss": 0.9185, + "step": 145 + }, + { + "epoch": 0.44776119402985076, + "grad_norm": 0.8990851640701294, + "learning_rate": 2.9225638316065483e-05, + "loss": 0.8581, + "step": 150 + }, + { + "epoch": 0.4626865671641791, + "grad_norm": 1.0121314525604248, + "learning_rate": 2.9145384039571743e-05, + "loss": 0.9251, + "step": 155 + }, + { + "epoch": 0.47761194029850745, + "grad_norm": 1.0341403484344482, + "learning_rate": 2.9061294917011817e-05, + "loss": 0.8724, + "step": 160 + }, + { + "epoch": 0.4925373134328358, + "grad_norm": 1.0750606060028076, + "learning_rate": 2.897339374513975e-05, + "loss": 0.8524, + "step": 165 + }, + { + "epoch": 0.5074626865671642, + "grad_norm": 0.9458211660385132, + "learning_rate": 2.888170435416491e-05, + "loss": 0.854, + "step": 170 + }, + { + "epoch": 0.5223880597014925, + "grad_norm": 1.1025030612945557, + "learning_rate": 2.878625160129155e-05, + "loss": 0.8086, + "step": 175 + }, + { + "epoch": 0.5373134328358209, + "grad_norm": 1.0337027311325073, + "learning_rate": 2.8687061363979963e-05, + "loss": 0.8475, + "step": 180 + }, + { + "epoch": 0.5522388059701493, + "grad_norm": 1.1231862306594849, + "learning_rate": 2.858416053293105e-05, + "loss": 0.8321, + "step": 185 + }, + { + "epoch": 0.5671641791044776, + "grad_norm": 1.0165115594863892, + "learning_rate": 2.84775770047962e-05, + "loss": 0.7618, + "step": 190 + }, + { + "epoch": 0.582089552238806, + "grad_norm": 1.0221655368804932, + "learning_rate": 2.8367339674614402e-05, + "loss": 0.7391, + "step": 195 + }, + { + "epoch": 0.5970149253731343, + "grad_norm": 0.9674348831176758, + "learning_rate": 2.825347842797879e-05, + "loss": 0.7241, + "step": 200 + }, + { + "epoch": 0.6119402985074627, + "grad_norm": 1.1583142280578613, + "learning_rate": 2.8136024132934552e-05, + "loss": 0.7268, + "step": 205 + }, + { + "epoch": 0.6268656716417911, + "grad_norm": 1.0742383003234863, + "learning_rate": 2.8015008631610545e-05, + "loss": 0.7195, + "step": 210 + }, + { + "epoch": 0.6417910447761194, + "grad_norm": 1.191591501235962, + "learning_rate": 2.789046473158682e-05, + "loss": 0.7114, + "step": 215 + }, + { + "epoch": 0.6567164179104478, + "grad_norm": 1.0256109237670898, + "learning_rate": 2.7762426197000404e-05, + "loss": 0.6894, + "step": 220 + }, + { + "epoch": 0.6716417910447762, + "grad_norm": 1.0841492414474487, + "learning_rate": 2.763092773939177e-05, + "loss": 0.6723, + "step": 225 + }, + { + "epoch": 0.6865671641791045, + "grad_norm": 1.0821783542633057, + "learning_rate": 2.749600500829448e-05, + "loss": 0.7191, + "step": 230 + }, + { + "epoch": 0.7014925373134329, + "grad_norm": 1.1864893436431885, + "learning_rate": 2.7357694581570475e-05, + "loss": 0.6829, + "step": 235 + }, + { + "epoch": 0.7164179104477612, + "grad_norm": 1.196563720703125, + "learning_rate": 2.7216033955493756e-05, + "loss": 0.6168, + "step": 240 + }, + { + "epoch": 0.7313432835820896, + "grad_norm": 1.2161757946014404, + "learning_rate": 2.7071061534585064e-05, + "loss": 0.6126, + "step": 245 + }, + { + "epoch": 0.746268656716418, + "grad_norm": 1.1841959953308105, + "learning_rate": 2.6922816621200302e-05, + "loss": 0.5584, + "step": 250 + }, + { + "epoch": 0.7611940298507462, + "grad_norm": 1.2079623937606812, + "learning_rate": 2.6771339404875602e-05, + "loss": 0.619, + "step": 255 + }, + { + "epoch": 0.7761194029850746, + "grad_norm": 1.2497903108596802, + "learning_rate": 2.6616670951431842e-05, + "loss": 0.5902, + "step": 260 + }, + { + "epoch": 0.7910447761194029, + "grad_norm": 1.3056892156600952, + "learning_rate": 2.645885319184159e-05, + "loss": 0.5906, + "step": 265 + }, + { + "epoch": 0.8059701492537313, + "grad_norm": 1.1727190017700195, + "learning_rate": 2.6297928910861546e-05, + "loss": 0.5845, + "step": 270 + }, + { + "epoch": 0.8208955223880597, + "grad_norm": 1.2860804796218872, + "learning_rate": 2.6133941735433496e-05, + "loss": 0.5522, + "step": 275 + }, + { + "epoch": 0.835820895522388, + "grad_norm": 1.4585965871810913, + "learning_rate": 2.596693612285691e-05, + "loss": 0.5122, + "step": 280 + }, + { + "epoch": 0.8507462686567164, + "grad_norm": 1.2300658226013184, + "learning_rate": 2.5796957348736522e-05, + "loss": 0.5329, + "step": 285 + }, + { + "epoch": 0.8656716417910447, + "grad_norm": 1.1527055501937866, + "learning_rate": 2.5624051494707967e-05, + "loss": 0.5268, + "step": 290 + }, + { + "epoch": 0.8805970149253731, + "grad_norm": 1.401760220527649, + "learning_rate": 2.5448265435944957e-05, + "loss": 0.4384, + "step": 295 + }, + { + "epoch": 0.8955223880597015, + "grad_norm": 1.2067824602127075, + "learning_rate": 2.5269646828451323e-05, + "loss": 0.531, + "step": 300 + }, + { + "epoch": 0.9104477611940298, + "grad_norm": 1.1669389009475708, + "learning_rate": 2.5088244096141355e-05, + "loss": 0.5083, + "step": 305 + }, + { + "epoch": 0.9253731343283582, + "grad_norm": 1.2549790143966675, + "learning_rate": 2.490410641771196e-05, + "loss": 0.4948, + "step": 310 + }, + { + "epoch": 0.9402985074626866, + "grad_norm": 1.1124684810638428, + "learning_rate": 2.4717283713310224e-05, + "loss": 0.4682, + "step": 315 + }, + { + "epoch": 0.9552238805970149, + "grad_norm": 1.2738817930221558, + "learning_rate": 2.4527826630999922e-05, + "loss": 0.4862, + "step": 320 + }, + { + "epoch": 0.9701492537313433, + "grad_norm": 1.4591965675354004, + "learning_rate": 2.4335786533030736e-05, + "loss": 0.4589, + "step": 325 + }, + { + "epoch": 0.9850746268656716, + "grad_norm": 1.3909885883331299, + "learning_rate": 2.414121548191381e-05, + "loss": 0.4894, + "step": 330 + }, + { + "epoch": 1.0, + "grad_norm": 1.1816288232803345, + "learning_rate": 2.39441662263075e-05, + "loss": 0.4424, + "step": 335 + }, + { + "epoch": 1.0149253731343284, + "grad_norm": 1.2885860204696655, + "learning_rate": 2.374469218671708e-05, + "loss": 0.3895, + "step": 340 + }, + { + "epoch": 1.0298507462686568, + "grad_norm": 1.2231491804122925, + "learning_rate": 2.3542847441012325e-05, + "loss": 0.383, + "step": 345 + }, + { + "epoch": 1.044776119402985, + "grad_norm": 1.250614047050476, + "learning_rate": 2.333868670976688e-05, + "loss": 0.3837, + "step": 350 + }, + { + "epoch": 1.0597014925373134, + "grad_norm": 1.268602967262268, + "learning_rate": 2.3132265341423382e-05, + "loss": 0.3405, + "step": 355 + }, + { + "epoch": 1.0746268656716418, + "grad_norm": 1.2368922233581543, + "learning_rate": 2.292363929728836e-05, + "loss": 0.3814, + "step": 360 + }, + { + "epoch": 1.0895522388059702, + "grad_norm": 1.2279614210128784, + "learning_rate": 2.2712865136361037e-05, + "loss": 0.4063, + "step": 365 + }, + { + "epoch": 1.1044776119402986, + "grad_norm": 1.3168065547943115, + "learning_rate": 2.25e-05, + "loss": 0.3364, + "step": 370 + }, + { + "epoch": 1.1194029850746268, + "grad_norm": 1.218842625617981, + "learning_rate": 2.2285101596432084e-05, + "loss": 0.3685, + "step": 375 + }, + { + "epoch": 1.1343283582089552, + "grad_norm": 1.2473053932189941, + "learning_rate": 2.2068228185107524e-05, + "loss": 0.339, + "step": 380 + }, + { + "epoch": 1.1492537313432836, + "grad_norm": 1.1733065843582153, + "learning_rate": 2.1849438560905697e-05, + "loss": 0.3276, + "step": 385 + }, + { + "epoch": 1.164179104477612, + "grad_norm": 1.4096124172210693, + "learning_rate": 2.162879203819568e-05, + "loss": 0.3345, + "step": 390 + }, + { + "epoch": 1.1791044776119404, + "grad_norm": 1.2889751195907593, + "learning_rate": 2.1406348434755994e-05, + "loss": 0.35, + "step": 395 + }, + { + "epoch": 1.1940298507462686, + "grad_norm": 1.456931233406067, + "learning_rate": 2.118216805555788e-05, + "loss": 0.3376, + "step": 400 + }, + { + "epoch": 1.208955223880597, + "grad_norm": 1.230393648147583, + "learning_rate": 2.0956311676416464e-05, + "loss": 0.3515, + "step": 405 + }, + { + "epoch": 1.2238805970149254, + "grad_norm": 1.2560843229293823, + "learning_rate": 2.0728840527514294e-05, + "loss": 0.2949, + "step": 410 + }, + { + "epoch": 1.2388059701492538, + "grad_norm": 1.1252100467681885, + "learning_rate": 2.0499816276801724e-05, + "loss": 0.3096, + "step": 415 + }, + { + "epoch": 1.2537313432835822, + "grad_norm": 1.2263208627700806, + "learning_rate": 2.0269301013278555e-05, + "loss": 0.2973, + "step": 420 + }, + { + "epoch": 1.2686567164179103, + "grad_norm": 1.276763916015625, + "learning_rate": 2.0037357230161587e-05, + "loss": 0.3046, + "step": 425 + }, + { + "epoch": 1.2835820895522387, + "grad_norm": 1.250522255897522, + "learning_rate": 1.9804047807942564e-05, + "loss": 0.3059, + "step": 430 + }, + { + "epoch": 1.2985074626865671, + "grad_norm": 1.107211947441101, + "learning_rate": 1.956943599734112e-05, + "loss": 0.2718, + "step": 435 + }, + { + "epoch": 1.3134328358208955, + "grad_norm": 1.4417357444763184, + "learning_rate": 1.9333585402157365e-05, + "loss": 0.3245, + "step": 440 + }, + { + "epoch": 1.328358208955224, + "grad_norm": 1.255744457244873, + "learning_rate": 1.9096559962028746e-05, + "loss": 0.2511, + "step": 445 + }, + { + "epoch": 1.3432835820895521, + "grad_norm": 1.188854455947876, + "learning_rate": 1.88584239350959e-05, + "loss": 0.2632, + "step": 450 + }, + { + "epoch": 1.3582089552238805, + "grad_norm": 1.3037139177322388, + "learning_rate": 1.861924188058205e-05, + "loss": 0.3114, + "step": 455 + }, + { + "epoch": 1.373134328358209, + "grad_norm": 1.3202476501464844, + "learning_rate": 1.8379078641290923e-05, + "loss": 0.2675, + "step": 460 + }, + { + "epoch": 1.3880597014925373, + "grad_norm": 1.2824947834014893, + "learning_rate": 1.8137999326027696e-05, + "loss": 0.3042, + "step": 465 + }, + { + "epoch": 1.4029850746268657, + "grad_norm": 1.3042395114898682, + "learning_rate": 1.7896069291947827e-05, + "loss": 0.2619, + "step": 470 + }, + { + "epoch": 1.417910447761194, + "grad_norm": 1.1641383171081543, + "learning_rate": 1.7653354126838593e-05, + "loss": 0.2411, + "step": 475 + }, + { + "epoch": 1.4328358208955223, + "grad_norm": 1.481014609336853, + "learning_rate": 1.7409919631338124e-05, + "loss": 0.315, + "step": 480 + }, + { + "epoch": 1.4477611940298507, + "grad_norm": 1.2284170389175415, + "learning_rate": 1.7165831801096635e-05, + "loss": 0.2409, + "step": 485 + }, + { + "epoch": 1.462686567164179, + "grad_norm": 1.5635013580322266, + "learning_rate": 1.6921156808884904e-05, + "loss": 0.2898, + "step": 490 + }, + { + "epoch": 1.4776119402985075, + "grad_norm": 1.2590882778167725, + "learning_rate": 1.6675960986654675e-05, + "loss": 0.2653, + "step": 495 + }, + { + "epoch": 1.4925373134328357, + "grad_norm": 1.3019382953643799, + "learning_rate": 1.6430310807555884e-05, + "loss": 0.2121, + "step": 500 + }, + { + "epoch": 1.5074626865671643, + "grad_norm": 1.324436902999878, + "learning_rate": 1.618427286791568e-05, + "loss": 0.231, + "step": 505 + }, + { + "epoch": 1.5223880597014925, + "grad_norm": 1.2982065677642822, + "learning_rate": 1.593791386918396e-05, + "loss": 0.2314, + "step": 510 + }, + { + "epoch": 1.537313432835821, + "grad_norm": 1.2652369737625122, + "learning_rate": 1.5691300599850495e-05, + "loss": 0.2214, + "step": 515 + }, + { + "epoch": 1.5522388059701493, + "grad_norm": 1.2871897220611572, + "learning_rate": 1.5444499917338398e-05, + "loss": 0.248, + "step": 520 + }, + { + "epoch": 1.5671641791044775, + "grad_norm": 1.1088814735412598, + "learning_rate": 1.5197578729878915e-05, + "loss": 0.2275, + "step": 525 + }, + { + "epoch": 1.582089552238806, + "grad_norm": 1.2145695686340332, + "learning_rate": 1.4950603978372467e-05, + "loss": 0.2363, + "step": 530 + }, + { + "epoch": 1.5970149253731343, + "grad_norm": 1.1193269491195679, + "learning_rate": 1.4703642618240806e-05, + "loss": 0.2168, + "step": 535 + }, + { + "epoch": 1.6119402985074627, + "grad_norm": 1.0686043500900269, + "learning_rate": 1.4456761601275254e-05, + "loss": 0.2341, + "step": 540 + }, + { + "epoch": 1.626865671641791, + "grad_norm": 1.2121838331222534, + "learning_rate": 1.4210027857485932e-05, + "loss": 0.1899, + "step": 545 + }, + { + "epoch": 1.6417910447761193, + "grad_norm": 1.2336113452911377, + "learning_rate": 1.3963508276956832e-05, + "loss": 0.2296, + "step": 550 + }, + { + "epoch": 1.6567164179104479, + "grad_norm": 1.2309176921844482, + "learning_rate": 1.371726969171182e-05, + "loss": 0.1952, + "step": 555 + }, + { + "epoch": 1.671641791044776, + "grad_norm": 1.1745057106018066, + "learning_rate": 1.34713788575963e-05, + "loss": 0.1911, + "step": 560 + }, + { + "epoch": 1.6865671641791045, + "grad_norm": 1.397426962852478, + "learning_rate": 1.3225902436179515e-05, + "loss": 0.2207, + "step": 565 + }, + { + "epoch": 1.7014925373134329, + "grad_norm": 1.2072315216064453, + "learning_rate": 1.2980906976682508e-05, + "loss": 0.203, + "step": 570 + }, + { + "epoch": 1.716417910447761, + "grad_norm": 1.4832441806793213, + "learning_rate": 1.2736458897936432e-05, + "loss": 0.1923, + "step": 575 + }, + { + "epoch": 1.7313432835820897, + "grad_norm": 1.302507758140564, + "learning_rate": 1.2492624470376253e-05, + "loss": 0.1981, + "step": 580 + }, + { + "epoch": 1.7462686567164178, + "grad_norm": 1.2522516250610352, + "learning_rate": 1.22494697980747e-05, + "loss": 0.2165, + "step": 585 + }, + { + "epoch": 1.7611940298507462, + "grad_norm": 1.2849504947662354, + "learning_rate": 1.20070608008213e-05, + "loss": 0.2051, + "step": 590 + }, + { + "epoch": 1.7761194029850746, + "grad_norm": 1.2042944431304932, + "learning_rate": 1.1765463196251349e-05, + "loss": 0.2059, + "step": 595 + }, + { + "epoch": 1.7910447761194028, + "grad_norm": 1.2073869705200195, + "learning_rate": 1.1524742482029728e-05, + "loss": 0.1753, + "step": 600 + }, + { + "epoch": 1.8059701492537314, + "grad_norm": 1.0507533550262451, + "learning_rate": 1.1284963918094346e-05, + "loss": 0.1943, + "step": 605 + }, + { + "epoch": 1.8208955223880596, + "grad_norm": 1.2254672050476074, + "learning_rate": 1.104619250896399e-05, + "loss": 0.1942, + "step": 610 + }, + { + "epoch": 1.835820895522388, + "grad_norm": 1.2964439392089844, + "learning_rate": 1.0808492986115476e-05, + "loss": 0.1636, + "step": 615 + }, + { + "epoch": 1.8507462686567164, + "grad_norm": 1.3530027866363525, + "learning_rate": 1.0571929790434792e-05, + "loss": 0.1779, + "step": 620 + }, + { + "epoch": 1.8656716417910446, + "grad_norm": 1.184253215789795, + "learning_rate": 1.0336567054747033e-05, + "loss": 0.2006, + "step": 625 + }, + { + "epoch": 1.8805970149253732, + "grad_norm": 1.2191828489303589, + "learning_rate": 1.0102468586429808e-05, + "loss": 0.1808, + "step": 630 + }, + { + "epoch": 1.8955223880597014, + "grad_norm": 1.1134520769119263, + "learning_rate": 9.86969785011497e-06, + "loss": 0.1692, + "step": 635 + }, + { + "epoch": 1.9104477611940298, + "grad_norm": 1.1991833448410034, + "learning_rate": 9.638317950483167e-06, + "loss": 0.1611, + "step": 640 + }, + { + "epoch": 1.9253731343283582, + "grad_norm": 0.9923439621925354, + "learning_rate": 9.408391615156023e-06, + "loss": 0.1447, + "step": 645 + }, + { + "epoch": 1.9402985074626866, + "grad_norm": 1.508631706237793, + "learning_rate": 9.179981177690566e-06, + "loss": 0.1536, + "step": 650 + }, + { + "epoch": 1.955223880597015, + "grad_norm": 1.212967038154602, + "learning_rate": 8.953148560680419e-06, + "loss": 0.1742, + "step": 655 + }, + { + "epoch": 1.9701492537313432, + "grad_norm": 1.3645206689834595, + "learning_rate": 8.727955258968462e-06, + "loss": 0.1592, + "step": 660 + }, + { + "epoch": 1.9850746268656716, + "grad_norm": 1.268890380859375, + "learning_rate": 8.504462322975442e-06, + "loss": 0.1528, + "step": 665 + }, + { + "epoch": 2.0, + "grad_norm": 1.109313726425171, + "learning_rate": 8.282730342149059e-06, + "loss": 0.1458, + "step": 670 + }, + { + "epoch": 2.014925373134328, + "grad_norm": 1.0683165788650513, + "learning_rate": 8.062819428538009e-06, + "loss": 0.117, + "step": 675 + }, + { + "epoch": 2.029850746268657, + "grad_norm": 0.9610497951507568, + "learning_rate": 7.844789200495517e-06, + "loss": 0.1162, + "step": 680 + }, + { + "epoch": 2.044776119402985, + "grad_norm": 1.0808664560317993, + "learning_rate": 7.628698766516625e-06, + "loss": 0.1142, + "step": 685 + }, + { + "epoch": 2.0597014925373136, + "grad_norm": 0.8719048500061035, + "learning_rate": 7.414606709213735e-06, + "loss": 0.1105, + "step": 690 + }, + { + "epoch": 2.074626865671642, + "grad_norm": 0.9152578115463257, + "learning_rate": 7.202571069434772e-06, + "loss": 0.1154, + "step": 695 + }, + { + "epoch": 2.08955223880597, + "grad_norm": 1.1983245611190796, + "learning_rate": 6.992649330528146e-06, + "loss": 0.1191, + "step": 700 + }, + { + "epoch": 2.1044776119402986, + "grad_norm": 1.0236181020736694, + "learning_rate": 6.78489840275887e-06, + "loss": 0.1155, + "step": 705 + }, + { + "epoch": 2.1194029850746268, + "grad_norm": 1.0478994846343994, + "learning_rate": 6.579374607880116e-06, + "loss": 0.1099, + "step": 710 + }, + { + "epoch": 2.1343283582089554, + "grad_norm": 1.0059715509414673, + "learning_rate": 6.376133663864196e-06, + "loss": 0.1072, + "step": 715 + }, + { + "epoch": 2.1492537313432836, + "grad_norm": 1.1679953336715698, + "learning_rate": 6.175230669797306e-06, + "loss": 0.1231, + "step": 720 + }, + { + "epoch": 2.1641791044776117, + "grad_norm": 1.2496074438095093, + "learning_rate": 5.976720090942066e-06, + "loss": 0.1185, + "step": 725 + }, + { + "epoch": 2.1791044776119404, + "grad_norm": 0.9649572968482971, + "learning_rate": 5.780655743971844e-06, + "loss": 0.1017, + "step": 730 + }, + { + "epoch": 2.1940298507462686, + "grad_norm": 1.0837630033493042, + "learning_rate": 5.587090782380912e-06, + "loss": 0.1072, + "step": 735 + }, + { + "epoch": 2.208955223880597, + "grad_norm": 0.9138120412826538, + "learning_rate": 5.3960776820744415e-06, + "loss": 0.1204, + "step": 740 + }, + { + "epoch": 2.2238805970149254, + "grad_norm": 0.9386446475982666, + "learning_rate": 5.207668227142178e-06, + "loss": 0.0992, + "step": 745 + }, + { + "epoch": 2.2388059701492535, + "grad_norm": 0.9485884308815002, + "learning_rate": 5.021913495819593e-06, + "loss": 0.1112, + "step": 750 + }, + { + "epoch": 2.253731343283582, + "grad_norm": 0.7710543870925903, + "learning_rate": 4.838863846640524e-06, + "loss": 0.0896, + "step": 755 + }, + { + "epoch": 2.2686567164179103, + "grad_norm": 0.8565321564674377, + "learning_rate": 4.6585689047848264e-06, + "loss": 0.1101, + "step": 760 + }, + { + "epoch": 2.283582089552239, + "grad_norm": 0.9592469334602356, + "learning_rate": 4.481077548624871e-06, + "loss": 0.1052, + "step": 765 + }, + { + "epoch": 2.298507462686567, + "grad_norm": 0.8947846293449402, + "learning_rate": 4.306437896474523e-06, + "loss": 0.1161, + "step": 770 + }, + { + "epoch": 2.3134328358208958, + "grad_norm": 0.7777400016784668, + "learning_rate": 4.134697293544158e-06, + "loss": 0.1123, + "step": 775 + }, + { + "epoch": 2.328358208955224, + "grad_norm": 0.9739806056022644, + "learning_rate": 3.965902299105245e-06, + "loss": 0.1052, + "step": 780 + }, + { + "epoch": 2.343283582089552, + "grad_norm": 1.0352954864501953, + "learning_rate": 3.8000986738680245e-06, + "loss": 0.1062, + "step": 785 + }, + { + "epoch": 2.3582089552238807, + "grad_norm": 0.9426825046539307, + "learning_rate": 3.637331367575698e-06, + "loss": 0.1012, + "step": 790 + }, + { + "epoch": 2.373134328358209, + "grad_norm": 1.0315134525299072, + "learning_rate": 3.4776445068184365e-06, + "loss": 0.1012, + "step": 795 + }, + { + "epoch": 2.388059701492537, + "grad_norm": 0.7325502634048462, + "learning_rate": 3.32108138307054e-06, + "loss": 0.0931, + "step": 800 + }, + { + "epoch": 2.4029850746268657, + "grad_norm": 0.6805497407913208, + "learning_rate": 3.1676844409540607e-06, + "loss": 0.0914, + "step": 805 + }, + { + "epoch": 2.417910447761194, + "grad_norm": 0.8266638517379761, + "learning_rate": 3.017495266731942e-06, + "loss": 0.0938, + "step": 810 + }, + { + "epoch": 2.4328358208955225, + "grad_norm": 0.9226012825965881, + "learning_rate": 2.8705545770338758e-06, + "loss": 0.1044, + "step": 815 + }, + { + "epoch": 2.4477611940298507, + "grad_norm": 0.8351355195045471, + "learning_rate": 2.7269022078179638e-06, + "loss": 0.0976, + "step": 820 + }, + { + "epoch": 2.4626865671641793, + "grad_norm": 0.9281870126724243, + "learning_rate": 2.5865771035710777e-06, + "loss": 0.0905, + "step": 825 + }, + { + "epoch": 2.4776119402985075, + "grad_norm": 0.7768364548683167, + "learning_rate": 2.449617306750913e-06, + "loss": 0.0948, + "step": 830 + }, + { + "epoch": 2.4925373134328357, + "grad_norm": 1.0167577266693115, + "learning_rate": 2.3160599474726073e-06, + "loss": 0.0822, + "step": 835 + }, + { + "epoch": 2.5074626865671643, + "grad_norm": 0.8464260101318359, + "learning_rate": 2.1859412334426853e-06, + "loss": 0.0824, + "step": 840 + }, + { + "epoch": 2.5223880597014925, + "grad_norm": 1.1654131412506104, + "learning_rate": 2.0592964401430377e-06, + "loss": 0.1085, + "step": 845 + }, + { + "epoch": 2.5373134328358207, + "grad_norm": 0.8176227807998657, + "learning_rate": 1.936159901267682e-06, + "loss": 0.0864, + "step": 850 + }, + { + "epoch": 2.5522388059701493, + "grad_norm": 0.8321353197097778, + "learning_rate": 1.8165649994148203e-06, + "loss": 0.0977, + "step": 855 + }, + { + "epoch": 2.5671641791044775, + "grad_norm": 0.8530035614967346, + "learning_rate": 1.7005441570367164e-06, + "loss": 0.0951, + "step": 860 + }, + { + "epoch": 2.582089552238806, + "grad_norm": 0.8285754919052124, + "learning_rate": 1.5881288276499211e-06, + "loss": 0.0848, + "step": 865 + }, + { + "epoch": 2.5970149253731343, + "grad_norm": 0.6777550578117371, + "learning_rate": 1.4793494873081504e-06, + "loss": 0.0899, + "step": 870 + }, + { + "epoch": 2.611940298507463, + "grad_norm": 0.8134555220603943, + "learning_rate": 1.374235626340128e-06, + "loss": 0.107, + "step": 875 + }, + { + "epoch": 2.626865671641791, + "grad_norm": 0.7743486762046814, + "learning_rate": 1.2728157413547232e-06, + "loss": 0.0859, + "step": 880 + }, + { + "epoch": 2.6417910447761193, + "grad_norm": 0.728428304195404, + "learning_rate": 1.1751173275154403e-06, + "loss": 0.0875, + "step": 885 + }, + { + "epoch": 2.656716417910448, + "grad_norm": 0.8007799386978149, + "learning_rate": 1.0811668710864098e-06, + "loss": 0.0809, + "step": 890 + }, + { + "epoch": 2.671641791044776, + "grad_norm": 0.8097808957099915, + "learning_rate": 9.909898422519198e-07, + "loss": 0.1015, + "step": 895 + }, + { + "epoch": 2.6865671641791042, + "grad_norm": 0.5826131701469421, + "learning_rate": 9.046106882113753e-07, + "loss": 0.0743, + "step": 900 + }, + { + "epoch": 2.701492537313433, + "grad_norm": 0.9193049669265747, + "learning_rate": 8.220528265516125e-07, + "loss": 0.0858, + "step": 905 + }, + { + "epoch": 2.716417910447761, + "grad_norm": 0.7010374665260315, + "learning_rate": 7.433386388983343e-07, + "loss": 0.0761, + "step": 910 + }, + { + "epoch": 2.7313432835820897, + "grad_norm": 0.7247260212898254, + "learning_rate": 6.684894648484069e-07, + "loss": 0.0798, + "step": 915 + }, + { + "epoch": 2.746268656716418, + "grad_norm": 0.717458963394165, + "learning_rate": 5.975255961846343e-07, + "loss": 0.0996, + "step": 920 + }, + { + "epoch": 2.7611940298507465, + "grad_norm": 0.7238516807556152, + "learning_rate": 5.304662713746205e-07, + "loss": 0.0906, + "step": 925 + }, + { + "epoch": 2.7761194029850746, + "grad_norm": 0.7997190356254578, + "learning_rate": 4.6732967035517326e-07, + "loss": 0.0901, + "step": 930 + }, + { + "epoch": 2.791044776119403, + "grad_norm": 0.7370800971984863, + "learning_rate": 4.081329096036829e-07, + "loss": 0.0895, + "step": 935 + }, + { + "epoch": 2.8059701492537314, + "grad_norm": 0.8431984782218933, + "learning_rate": 3.528920374977979e-07, + "loss": 0.087, + "step": 940 + }, + { + "epoch": 2.8208955223880596, + "grad_norm": 0.703717052936554, + "learning_rate": 3.0162202996468156e-07, + "loss": 0.0799, + "step": 945 + }, + { + "epoch": 2.835820895522388, + "grad_norm": 0.7964219450950623, + "learning_rate": 2.5433678642100664e-07, + "loss": 0.0971, + "step": 950 + }, + { + "epoch": 2.8507462686567164, + "grad_norm": 0.8837234973907471, + "learning_rate": 2.110491260047792e-07, + "loss": 0.0954, + "step": 955 + }, + { + "epoch": 2.8656716417910446, + "grad_norm": 0.8303608298301697, + "learning_rate": 1.7177078410005041e-07, + "loss": 0.0939, + "step": 960 + }, + { + "epoch": 2.8805970149253732, + "grad_norm": 0.7764897346496582, + "learning_rate": 1.3651240915542652e-07, + "loss": 0.097, + "step": 965 + }, + { + "epoch": 2.8955223880597014, + "grad_norm": 0.723455011844635, + "learning_rate": 1.0528355979724624e-07, + "loss": 0.0906, + "step": 970 + }, + { + "epoch": 2.91044776119403, + "grad_norm": 0.7808859348297119, + "learning_rate": 7.809270223821552e-08, + "loss": 0.0762, + "step": 975 + }, + { + "epoch": 2.925373134328358, + "grad_norm": 0.7573654055595398, + "learning_rate": 5.4947207982204985e-08, + "loss": 0.0827, + "step": 980 + }, + { + "epoch": 2.9402985074626864, + "grad_norm": 0.7339059114456177, + "learning_rate": 3.585335182580529e-08, + "loss": 0.1068, + "step": 985 + }, + { + "epoch": 2.955223880597015, + "grad_norm": 0.6201454997062683, + "learning_rate": 2.0816310157227846e-08, + "loss": 0.0862, + "step": 990 + }, + { + "epoch": 2.970149253731343, + "grad_norm": 0.6696372628211975, + "learning_rate": 9.840159552969019e-09, + "loss": 0.0807, + "step": 995 + }, + { + "epoch": 2.9850746268656714, + "grad_norm": 0.7628394961357117, + "learning_rate": 2.9278756726375257e-09, + "loss": 0.0933, + "step": 1000 + }, + { + "epoch": 3.0, + "grad_norm": 0.8191271424293518, + "learning_rate": 8.133245225305785e-11, + "loss": 0.0944, + "step": 1005 + }, + { + "epoch": 3.0, + "step": 1005, + "total_flos": 1.2667695529522627e+18, + "train_loss": 0.4250270954708555, + "train_runtime": 542.136, + "train_samples_per_second": 59.321, + "train_steps_per_second": 1.854 + } + ], + "logging_steps": 5, + "max_steps": 1005, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2667695529522627e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d6ceeafc9e5eaac2831097deb0cfd33ec50c9ec --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/4_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260bf869dd0cd42ef8bb2508a4f245cd09f6cd149871245320b5ee39c8807ee0 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8c1f0a0fcb9ea2e92690e7b7dc634731f4ff2fbf --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 5_128_e3_3e-5 + results: [] +--- + + + +# 5_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3386ec647e9a11becbddcedb18ae22c6bffaf56c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "v_proj", + "gate_proj", + "k_proj", + "down_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f923f43e489bdc75b888be5ca0180d79877a8892 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0b067af00ed0f2f9b4805e4dd641f46345f718eb8feac3e35687ce0567b2b9 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a72c9c02081fa9b812da5d3b2da30b0359bfc81 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3838964687204516e+18, + "train_loss": 0.43290177936273844, + "train_runtime": 607.2111, + "train_samples": 11683, + "train_samples_per_second": 57.721, + "train_steps_per_second": 1.808 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a72c9c02081fa9b812da5d3b2da30b0359bfc81 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.3838964687204516e+18, + "train_loss": 0.43290177936273844, + "train_runtime": 607.2111, + "train_samples": 11683, + "train_samples_per_second": 57.721, + "train_steps_per_second": 1.808 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..61f0bb20135bdd27a218c55ea34e550aa3cf426b --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1576 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1098, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.013679890560875513, + "grad_norm": 0.6961843967437744, + "learning_rate": 2.181818181818182e-06, + "loss": 1.6915, + "step": 5 + }, + { + "epoch": 0.027359781121751026, + "grad_norm": 0.6326503753662109, + "learning_rate": 4.90909090909091e-06, + "loss": 1.5789, + "step": 10 + }, + { + "epoch": 0.04103967168262654, + "grad_norm": 0.6139146685600281, + "learning_rate": 7.636363636363636e-06, + "loss": 1.5606, + "step": 15 + }, + { + "epoch": 0.05471956224350205, + "grad_norm": 0.5155859589576721, + "learning_rate": 1.0363636363636364e-05, + "loss": 1.594, + "step": 20 + }, + { + "epoch": 0.06839945280437756, + "grad_norm": 0.4652259647846222, + "learning_rate": 1.309090909090909e-05, + "loss": 1.5246, + "step": 25 + }, + { + "epoch": 0.08207934336525308, + "grad_norm": 0.4880393445491791, + "learning_rate": 1.5818181818181818e-05, + "loss": 1.6006, + "step": 30 + }, + { + "epoch": 0.09575923392612859, + "grad_norm": 0.4693818986415863, + "learning_rate": 1.8545454545454545e-05, + "loss": 1.622, + "step": 35 + }, + { + "epoch": 0.1094391244870041, + "grad_norm": 0.4324559271335602, + "learning_rate": 2.1272727272727273e-05, + "loss": 1.5211, + "step": 40 + }, + { + "epoch": 0.12311901504787962, + "grad_norm": 0.44805678725242615, + "learning_rate": 2.4e-05, + "loss": 1.491, + "step": 45 + }, + { + "epoch": 0.13679890560875513, + "grad_norm": 0.4785243570804596, + "learning_rate": 2.6727272727272728e-05, + "loss": 1.4884, + "step": 50 + }, + { + "epoch": 0.15047879616963064, + "grad_norm": 0.5537132024765015, + "learning_rate": 2.9454545454545456e-05, + "loss": 1.5314, + "step": 55 + }, + { + "epoch": 0.16415868673050615, + "grad_norm": 0.5226398706436157, + "learning_rate": 2.9998911302762293e-05, + "loss": 1.3805, + "step": 60 + }, + { + "epoch": 0.17783857729138167, + "grad_norm": 0.5446161031723022, + "learning_rate": 2.9994488741083514e-05, + "loss": 1.4637, + "step": 65 + }, + { + "epoch": 0.19151846785225718, + "grad_norm": 0.5768933296203613, + "learning_rate": 2.9986665273697548e-05, + "loss": 1.3576, + "step": 70 + }, + { + "epoch": 0.2051983584131327, + "grad_norm": 0.5790114402770996, + "learning_rate": 2.997544267504801e-05, + "loss": 1.3578, + "step": 75 + }, + { + "epoch": 0.2188782489740082, + "grad_norm": 0.6538183689117432, + "learning_rate": 2.9960823490536772e-05, + "loss": 1.3461, + "step": 80 + }, + { + "epoch": 0.23255813953488372, + "grad_norm": 0.6376515030860901, + "learning_rate": 2.9942811035946656e-05, + "loss": 1.2944, + "step": 85 + }, + { + "epoch": 0.24623803009575923, + "grad_norm": 0.6155748963356018, + "learning_rate": 2.9921409396689347e-05, + "loss": 1.269, + "step": 90 + }, + { + "epoch": 0.25991792065663477, + "grad_norm": 0.6697672009468079, + "learning_rate": 2.9896623426878805e-05, + "loss": 1.2288, + "step": 95 + }, + { + "epoch": 0.27359781121751026, + "grad_norm": 0.7174825072288513, + "learning_rate": 2.9868458748230293e-05, + "loss": 1.1803, + "step": 100 + }, + { + "epoch": 0.2872777017783858, + "grad_norm": 0.7077262997627258, + "learning_rate": 2.983692174878531e-05, + "loss": 1.1162, + "step": 105 + }, + { + "epoch": 0.3009575923392613, + "grad_norm": 0.8187729120254517, + "learning_rate": 2.980201958146272e-05, + "loss": 1.1579, + "step": 110 + }, + { + "epoch": 0.3146374829001368, + "grad_norm": 0.7795064449310303, + "learning_rate": 2.9763760162436405e-05, + "loss": 1.1429, + "step": 115 + }, + { + "epoch": 0.3283173734610123, + "grad_norm": 0.8283049464225769, + "learning_rate": 2.9722152169339765e-05, + "loss": 1.1293, + "step": 120 + }, + { + "epoch": 0.34199726402188785, + "grad_norm": 0.9120482802391052, + "learning_rate": 2.967720503929759e-05, + "loss": 1.0394, + "step": 125 + }, + { + "epoch": 0.35567715458276333, + "grad_norm": 0.8278937935829163, + "learning_rate": 2.962892896678557e-05, + "loss": 1.0219, + "step": 130 + }, + { + "epoch": 0.3693570451436389, + "grad_norm": 0.9370738863945007, + "learning_rate": 2.9577334901318115e-05, + "loss": 1.0113, + "step": 135 + }, + { + "epoch": 0.38303693570451436, + "grad_norm": 0.7835413217544556, + "learning_rate": 2.952243454496488e-05, + "loss": 0.9997, + "step": 140 + }, + { + "epoch": 0.3967168262653899, + "grad_norm": 0.8963810205459595, + "learning_rate": 2.9464240349696625e-05, + "loss": 0.9436, + "step": 145 + }, + { + "epoch": 0.4103967168262654, + "grad_norm": 0.8409708142280579, + "learning_rate": 2.9402765514560955e-05, + "loss": 1.0452, + "step": 150 + }, + { + "epoch": 0.4240766073871409, + "grad_norm": 0.9141013026237488, + "learning_rate": 2.9338023982688657e-05, + "loss": 0.8942, + "step": 155 + }, + { + "epoch": 0.4377564979480164, + "grad_norm": 0.9848332405090332, + "learning_rate": 2.9270030438131263e-05, + "loss": 0.9147, + "step": 160 + }, + { + "epoch": 0.45143638850889195, + "grad_norm": 1.0231835842132568, + "learning_rate": 2.9198800302530532e-05, + "loss": 0.9139, + "step": 165 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 1.0482298135757446, + "learning_rate": 2.912434973162067e-05, + "loss": 0.8106, + "step": 170 + }, + { + "epoch": 0.478796169630643, + "grad_norm": 1.0694830417633057, + "learning_rate": 2.904669561156404e-05, + "loss": 0.8641, + "step": 175 + }, + { + "epoch": 0.49247606019151846, + "grad_norm": 0.9498010873794556, + "learning_rate": 2.8965855555121216e-05, + "loss": 0.8859, + "step": 180 + }, + { + "epoch": 0.506155950752394, + "grad_norm": 1.2511837482452393, + "learning_rate": 2.8881847897656224e-05, + "loss": 0.7833, + "step": 185 + }, + { + "epoch": 0.5198358413132695, + "grad_norm": 1.031023383140564, + "learning_rate": 2.879469169297787e-05, + "loss": 0.7554, + "step": 190 + }, + { + "epoch": 0.533515731874145, + "grad_norm": 1.153853416442871, + "learning_rate": 2.870440670901816e-05, + "loss": 0.8337, + "step": 195 + }, + { + "epoch": 0.5471956224350205, + "grad_norm": 1.1300697326660156, + "learning_rate": 2.8611013423348727e-05, + "loss": 0.7136, + "step": 200 + }, + { + "epoch": 0.560875512995896, + "grad_norm": 1.0052961111068726, + "learning_rate": 2.8514533018536286e-05, + "loss": 0.8643, + "step": 205 + }, + { + "epoch": 0.5745554035567716, + "grad_norm": 1.4509509801864624, + "learning_rate": 2.841498737733824e-05, + "loss": 0.6982, + "step": 210 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.017088770866394, + "learning_rate": 2.8312399077739407e-05, + "loss": 0.7273, + "step": 215 + }, + { + "epoch": 0.6019151846785226, + "grad_norm": 0.9969433546066284, + "learning_rate": 2.8206791387831136e-05, + "loss": 0.7922, + "step": 220 + }, + { + "epoch": 0.615595075239398, + "grad_norm": 1.2430459260940552, + "learning_rate": 2.8098188260533818e-05, + "loss": 0.7226, + "step": 225 + }, + { + "epoch": 0.6292749658002736, + "grad_norm": 1.1920976638793945, + "learning_rate": 2.7986614328164168e-05, + "loss": 0.6968, + "step": 230 + }, + { + "epoch": 0.6429548563611491, + "grad_norm": 1.106497883796692, + "learning_rate": 2.7872094896848307e-05, + "loss": 0.72, + "step": 235 + }, + { + "epoch": 0.6566347469220246, + "grad_norm": 1.0783611536026, + "learning_rate": 2.7754655940782117e-05, + "loss": 0.7143, + "step": 240 + }, + { + "epoch": 0.6703146374829001, + "grad_norm": 1.1197624206542969, + "learning_rate": 2.763432409633998e-05, + "loss": 0.6846, + "step": 245 + }, + { + "epoch": 0.6839945280437757, + "grad_norm": 1.1689375638961792, + "learning_rate": 2.751112665603341e-05, + "loss": 0.6768, + "step": 250 + }, + { + "epoch": 0.6976744186046512, + "grad_norm": 1.102946400642395, + "learning_rate": 2.7385091562320808e-05, + "loss": 0.6456, + "step": 255 + }, + { + "epoch": 0.7113543091655267, + "grad_norm": 1.0549838542938232, + "learning_rate": 2.7256247401269814e-05, + "loss": 0.6579, + "step": 260 + }, + { + "epoch": 0.7250341997264022, + "grad_norm": 1.1273530721664429, + "learning_rate": 2.7124623396073715e-05, + "loss": 0.6493, + "step": 265 + }, + { + "epoch": 0.7387140902872777, + "grad_norm": 1.1917917728424072, + "learning_rate": 2.6990249400423305e-05, + "loss": 0.6062, + "step": 270 + }, + { + "epoch": 0.7523939808481532, + "grad_norm": 1.2869882583618164, + "learning_rate": 2.685315589173576e-05, + "loss": 0.692, + "step": 275 + }, + { + "epoch": 0.7660738714090287, + "grad_norm": 1.1397583484649658, + "learning_rate": 2.6713373964242043e-05, + "loss": 0.618, + "step": 280 + }, + { + "epoch": 0.7797537619699042, + "grad_norm": 1.2768388986587524, + "learning_rate": 2.6570935321934417e-05, + "loss": 0.5809, + "step": 285 + }, + { + "epoch": 0.7934336525307798, + "grad_norm": 1.1684890985488892, + "learning_rate": 2.642587227137564e-05, + "loss": 0.5804, + "step": 290 + }, + { + "epoch": 0.8071135430916553, + "grad_norm": 1.1871200799942017, + "learning_rate": 2.6278217714371496e-05, + "loss": 0.5975, + "step": 295 + }, + { + "epoch": 0.8207934336525308, + "grad_norm": 1.2277326583862305, + "learning_rate": 2.612800514050836e-05, + "loss": 0.6104, + "step": 300 + }, + { + "epoch": 0.8344733242134063, + "grad_norm": 1.2517526149749756, + "learning_rate": 2.597526861955736e-05, + "loss": 0.5519, + "step": 305 + }, + { + "epoch": 0.8481532147742818, + "grad_norm": 1.233791708946228, + "learning_rate": 2.582004279374704e-05, + "loss": 0.5655, + "step": 310 + }, + { + "epoch": 0.8618331053351573, + "grad_norm": 1.171066403388977, + "learning_rate": 2.5662362869906123e-05, + "loss": 0.5317, + "step": 315 + }, + { + "epoch": 0.8755129958960328, + "grad_norm": 1.3368580341339111, + "learning_rate": 2.5502264611478238e-05, + "loss": 0.5343, + "step": 320 + }, + { + "epoch": 0.8891928864569083, + "grad_norm": 1.2217209339141846, + "learning_rate": 2.5339784330410413e-05, + "loss": 0.5423, + "step": 325 + }, + { + "epoch": 0.9028727770177839, + "grad_norm": 1.276624321937561, + "learning_rate": 2.5174958878917135e-05, + "loss": 0.4714, + "step": 330 + }, + { + "epoch": 0.9165526675786594, + "grad_norm": 1.1445791721343994, + "learning_rate": 2.500782564112188e-05, + "loss": 0.5086, + "step": 335 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 1.199570894241333, + "learning_rate": 2.4838422524578027e-05, + "loss": 0.513, + "step": 340 + }, + { + "epoch": 0.9439124487004104, + "grad_norm": 1.1413410902023315, + "learning_rate": 2.4666787951671013e-05, + "loss": 0.462, + "step": 345 + }, + { + "epoch": 0.957592339261286, + "grad_norm": 1.1571322679519653, + "learning_rate": 2.4492960850903757e-05, + "loss": 0.4851, + "step": 350 + }, + { + "epoch": 0.9712722298221614, + "grad_norm": 1.2341431379318237, + "learning_rate": 2.4316980648067225e-05, + "loss": 0.507, + "step": 355 + }, + { + "epoch": 0.9849521203830369, + "grad_norm": 1.2431319952011108, + "learning_rate": 2.4138887257298317e-05, + "loss": 0.4891, + "step": 360 + }, + { + "epoch": 0.9986320109439124, + "grad_norm": 1.3505032062530518, + "learning_rate": 2.3958721072026893e-05, + "loss": 0.4339, + "step": 365 + }, + { + "epoch": 1.0109439124487003, + "grad_norm": 1.3656429052352905, + "learning_rate": 2.3776522955814094e-05, + "loss": 0.3766, + "step": 370 + }, + { + "epoch": 1.024623803009576, + "grad_norm": 1.3371591567993164, + "learning_rate": 2.3592334233084115e-05, + "loss": 0.4246, + "step": 375 + }, + { + "epoch": 1.0383036935704515, + "grad_norm": 1.1853920221328735, + "learning_rate": 2.3406196679751376e-05, + "loss": 0.4316, + "step": 380 + }, + { + "epoch": 1.051983584131327, + "grad_norm": 1.1238950490951538, + "learning_rate": 2.3218152513745306e-05, + "loss": 0.4015, + "step": 385 + }, + { + "epoch": 1.0656634746922025, + "grad_norm": 1.3657602071762085, + "learning_rate": 2.3028244385434863e-05, + "loss": 0.3865, + "step": 390 + }, + { + "epoch": 1.079343365253078, + "grad_norm": 1.3014473915100098, + "learning_rate": 2.283651536795504e-05, + "loss": 0.3606, + "step": 395 + }, + { + "epoch": 1.0930232558139534, + "grad_norm": 1.224346399307251, + "learning_rate": 2.2643008947437368e-05, + "loss": 0.3719, + "step": 400 + }, + { + "epoch": 1.106703146374829, + "grad_norm": 1.1991558074951172, + "learning_rate": 2.244776901314685e-05, + "loss": 0.3671, + "step": 405 + }, + { + "epoch": 1.1203830369357046, + "grad_norm": 1.3362585306167603, + "learning_rate": 2.22508398475274e-05, + "loss": 0.3674, + "step": 410 + }, + { + "epoch": 1.13406292749658, + "grad_norm": 1.3731608390808105, + "learning_rate": 2.2052266116158157e-05, + "loss": 0.3731, + "step": 415 + }, + { + "epoch": 1.1477428180574556, + "grad_norm": 1.5102213621139526, + "learning_rate": 2.185209285762281e-05, + "loss": 0.3268, + "step": 420 + }, + { + "epoch": 1.161422708618331, + "grad_norm": 1.2183620929718018, + "learning_rate": 2.165036547329444e-05, + "loss": 0.3849, + "step": 425 + }, + { + "epoch": 1.1751025991792066, + "grad_norm": 1.2314201593399048, + "learning_rate": 2.144712971703799e-05, + "loss": 0.3171, + "step": 430 + }, + { + "epoch": 1.188782489740082, + "grad_norm": 1.2154593467712402, + "learning_rate": 2.1242431684832802e-05, + "loss": 0.3586, + "step": 435 + }, + { + "epoch": 1.2024623803009575, + "grad_norm": 1.2991036176681519, + "learning_rate": 2.103631780431759e-05, + "loss": 0.3399, + "step": 440 + }, + { + "epoch": 1.216142270861833, + "grad_norm": 1.2038905620574951, + "learning_rate": 2.0828834824260168e-05, + "loss": 0.3641, + "step": 445 + }, + { + "epoch": 1.2298221614227085, + "grad_norm": 1.2024774551391602, + "learning_rate": 2.062002980395433e-05, + "loss": 0.349, + "step": 450 + }, + { + "epoch": 1.2435020519835842, + "grad_norm": 1.277677297592163, + "learning_rate": 2.0409950102546334e-05, + "loss": 0.3185, + "step": 455 + }, + { + "epoch": 1.2571819425444597, + "grad_norm": 1.2707167863845825, + "learning_rate": 2.0198643368293328e-05, + "loss": 0.3281, + "step": 460 + }, + { + "epoch": 1.2708618331053352, + "grad_norm": 1.2598094940185547, + "learning_rate": 1.998615752775626e-05, + "loss": 0.3099, + "step": 465 + }, + { + "epoch": 1.2845417236662107, + "grad_norm": 1.2862110137939453, + "learning_rate": 1.9772540774929624e-05, + "loss": 0.3114, + "step": 470 + }, + { + "epoch": 1.2982216142270862, + "grad_norm": 1.2780897617340088, + "learning_rate": 1.9557841560310556e-05, + "loss": 0.2962, + "step": 475 + }, + { + "epoch": 1.3119015047879616, + "grad_norm": 1.4337153434753418, + "learning_rate": 1.934210857990977e-05, + "loss": 0.3222, + "step": 480 + }, + { + "epoch": 1.3255813953488373, + "grad_norm": 1.1190190315246582, + "learning_rate": 1.912539076420678e-05, + "loss": 0.2971, + "step": 485 + }, + { + "epoch": 1.3392612859097128, + "grad_norm": 1.3275233507156372, + "learning_rate": 1.890773726705198e-05, + "loss": 0.322, + "step": 490 + }, + { + "epoch": 1.3529411764705883, + "grad_norm": 1.1304950714111328, + "learning_rate": 1.8689197454518034e-05, + "loss": 0.2799, + "step": 495 + }, + { + "epoch": 1.3666210670314638, + "grad_norm": 1.1424109935760498, + "learning_rate": 1.846982089370312e-05, + "loss": 0.2765, + "step": 500 + }, + { + "epoch": 1.3803009575923393, + "grad_norm": 1.1686183214187622, + "learning_rate": 1.824965734148863e-05, + "loss": 0.2819, + "step": 505 + }, + { + "epoch": 1.3939808481532148, + "grad_norm": 1.3379395008087158, + "learning_rate": 1.8028756733253758e-05, + "loss": 0.3128, + "step": 510 + }, + { + "epoch": 1.4076607387140903, + "grad_norm": 1.2741140127182007, + "learning_rate": 1.7807169171549677e-05, + "loss": 0.2522, + "step": 515 + }, + { + "epoch": 1.4213406292749657, + "grad_norm": 1.4755356311798096, + "learning_rate": 1.7584944914735713e-05, + "loss": 0.3038, + "step": 520 + }, + { + "epoch": 1.4350205198358412, + "grad_norm": 1.118135690689087, + "learning_rate": 1.7362134365580268e-05, + "loss": 0.2748, + "step": 525 + }, + { + "epoch": 1.4487004103967167, + "grad_norm": 1.259648323059082, + "learning_rate": 1.7138788059828935e-05, + "loss": 0.2691, + "step": 530 + }, + { + "epoch": 1.4623803009575924, + "grad_norm": 1.2880859375, + "learning_rate": 1.6914956654742454e-05, + "loss": 0.2266, + "step": 535 + }, + { + "epoch": 1.476060191518468, + "grad_norm": 1.1083964109420776, + "learning_rate": 1.6690690917607138e-05, + "loss": 0.2343, + "step": 540 + }, + { + "epoch": 1.4897400820793434, + "grad_norm": 1.6012113094329834, + "learning_rate": 1.6466041714220316e-05, + "loss": 0.2374, + "step": 545 + }, + { + "epoch": 1.5034199726402189, + "grad_norm": 1.3417255878448486, + "learning_rate": 1.6241059997353442e-05, + "loss": 0.2728, + "step": 550 + }, + { + "epoch": 1.5170998632010944, + "grad_norm": 1.2223930358886719, + "learning_rate": 1.6015796795195485e-05, + "loss": 0.2575, + "step": 555 + }, + { + "epoch": 1.53077975376197, + "grad_norm": 1.0708370208740234, + "learning_rate": 1.5790303199779194e-05, + "loss": 0.2449, + "step": 560 + }, + { + "epoch": 1.5444596443228455, + "grad_norm": 1.2155961990356445, + "learning_rate": 1.5564630355392902e-05, + "loss": 0.218, + "step": 565 + }, + { + "epoch": 1.558139534883721, + "grad_norm": 1.2410306930541992, + "learning_rate": 1.5338829446980464e-05, + "loss": 0.2583, + "step": 570 + }, + { + "epoch": 1.5718194254445965, + "grad_norm": 1.4256492853164673, + "learning_rate": 1.5112951688532002e-05, + "loss": 0.2434, + "step": 575 + }, + { + "epoch": 1.585499316005472, + "grad_norm": 1.286110520362854, + "learning_rate": 1.4887048311468002e-05, + "loss": 0.2377, + "step": 580 + }, + { + "epoch": 1.5991792065663475, + "grad_norm": 1.2660114765167236, + "learning_rate": 1.4661170553019537e-05, + "loss": 0.2428, + "step": 585 + }, + { + "epoch": 1.612859097127223, + "grad_norm": 1.3627064228057861, + "learning_rate": 1.4435369644607104e-05, + "loss": 0.2372, + "step": 590 + }, + { + "epoch": 1.6265389876880985, + "grad_norm": 1.1830154657363892, + "learning_rate": 1.4209696800220807e-05, + "loss": 0.2622, + "step": 595 + }, + { + "epoch": 1.640218878248974, + "grad_norm": 1.2680561542510986, + "learning_rate": 1.3984203204804517e-05, + "loss": 0.2348, + "step": 600 + }, + { + "epoch": 1.6538987688098494, + "grad_norm": 1.2810118198394775, + "learning_rate": 1.3758940002646562e-05, + "loss": 0.1826, + "step": 605 + }, + { + "epoch": 1.667578659370725, + "grad_norm": 1.2430423498153687, + "learning_rate": 1.3533958285779687e-05, + "loss": 0.2189, + "step": 610 + }, + { + "epoch": 1.6812585499316004, + "grad_norm": 1.2163525819778442, + "learning_rate": 1.3309309082392864e-05, + "loss": 0.2105, + "step": 615 + }, + { + "epoch": 1.694938440492476, + "grad_norm": 1.390239953994751, + "learning_rate": 1.3085043345257553e-05, + "loss": 0.2281, + "step": 620 + }, + { + "epoch": 1.7086183310533516, + "grad_norm": 1.1899970769882202, + "learning_rate": 1.2861211940171067e-05, + "loss": 0.2148, + "step": 625 + }, + { + "epoch": 1.722298221614227, + "grad_norm": 1.2089955806732178, + "learning_rate": 1.2637865634419735e-05, + "loss": 0.2266, + "step": 630 + }, + { + "epoch": 1.7359781121751026, + "grad_norm": 1.4811275005340576, + "learning_rate": 1.2415055085264289e-05, + "loss": 0.2325, + "step": 635 + }, + { + "epoch": 1.7496580027359783, + "grad_norm": 1.2113186120986938, + "learning_rate": 1.2192830828450327e-05, + "loss": 0.2272, + "step": 640 + }, + { + "epoch": 1.7633378932968538, + "grad_norm": 1.2834901809692383, + "learning_rate": 1.1971243266746243e-05, + "loss": 0.1998, + "step": 645 + }, + { + "epoch": 1.7770177838577292, + "grad_norm": 1.3340630531311035, + "learning_rate": 1.175034265851137e-05, + "loss": 0.2074, + "step": 650 + }, + { + "epoch": 1.7906976744186047, + "grad_norm": 1.1062945127487183, + "learning_rate": 1.1530179106296881e-05, + "loss": 0.2038, + "step": 655 + }, + { + "epoch": 1.8043775649794802, + "grad_norm": 1.1979867219924927, + "learning_rate": 1.131080254548197e-05, + "loss": 0.1759, + "step": 660 + }, + { + "epoch": 1.8180574555403557, + "grad_norm": 1.1723865270614624, + "learning_rate": 1.1092262732948017e-05, + "loss": 0.1879, + "step": 665 + }, + { + "epoch": 1.8317373461012312, + "grad_norm": 1.2494192123413086, + "learning_rate": 1.0874609235793222e-05, + "loss": 0.1842, + "step": 670 + }, + { + "epoch": 1.8454172366621067, + "grad_norm": 1.3119175434112549, + "learning_rate": 1.0657891420090236e-05, + "loss": 0.1671, + "step": 675 + }, + { + "epoch": 1.8590971272229821, + "grad_norm": 1.031211495399475, + "learning_rate": 1.0442158439689444e-05, + "loss": 0.1952, + "step": 680 + }, + { + "epoch": 1.8727770177838576, + "grad_norm": 1.1180797815322876, + "learning_rate": 1.0227459225070379e-05, + "loss": 0.173, + "step": 685 + }, + { + "epoch": 1.8864569083447331, + "grad_norm": 1.1412172317504883, + "learning_rate": 1.0013842472243742e-05, + "loss": 0.1651, + "step": 690 + }, + { + "epoch": 1.9001367989056086, + "grad_norm": 1.0055841207504272, + "learning_rate": 9.801356631706676e-06, + "loss": 0.1743, + "step": 695 + }, + { + "epoch": 1.9138166894664843, + "grad_norm": 1.3123230934143066, + "learning_rate": 9.590049897453668e-06, + "loss": 0.1635, + "step": 700 + }, + { + "epoch": 1.9274965800273598, + "grad_norm": 1.2890970706939697, + "learning_rate": 9.379970196045672e-06, + "loss": 0.1693, + "step": 705 + }, + { + "epoch": 1.9411764705882353, + "grad_norm": 1.140576958656311, + "learning_rate": 9.171165175739832e-06, + "loss": 0.1506, + "step": 710 + }, + { + "epoch": 1.9548563611491108, + "grad_norm": 1.1876869201660156, + "learning_rate": 8.96368219568241e-06, + "loss": 0.1531, + "step": 715 + }, + { + "epoch": 1.9685362517099865, + "grad_norm": 1.0748443603515625, + "learning_rate": 8.7575683151672e-06, + "loss": 0.1758, + "step": 720 + }, + { + "epoch": 1.982216142270862, + "grad_norm": 1.0670384168624878, + "learning_rate": 8.552870282962012e-06, + "loss": 0.1785, + "step": 725 + }, + { + "epoch": 1.9958960328317374, + "grad_norm": 1.0904499292373657, + "learning_rate": 8.349634526705558e-06, + "loss": 0.1679, + "step": 730 + }, + { + "epoch": 2.008207934336525, + "grad_norm": 1.1492786407470703, + "learning_rate": 8.147907142377198e-06, + "loss": 0.1237, + "step": 735 + }, + { + "epoch": 2.0218878248974006, + "grad_norm": 1.3833168745040894, + "learning_rate": 7.947733883841847e-06, + "loss": 0.1346, + "step": 740 + }, + { + "epoch": 2.0355677154582765, + "grad_norm": 1.0279572010040283, + "learning_rate": 7.749160152472603e-06, + "loss": 0.1323, + "step": 745 + }, + { + "epoch": 2.049247606019152, + "grad_norm": 1.1236238479614258, + "learning_rate": 7.552230986853153e-06, + "loss": 0.1321, + "step": 750 + }, + { + "epoch": 2.0629274965800275, + "grad_norm": 1.0342386960983276, + "learning_rate": 7.35699105256263e-06, + "loss": 0.1202, + "step": 755 + }, + { + "epoch": 2.076607387140903, + "grad_norm": 1.0407075881958008, + "learning_rate": 7.1634846320449625e-06, + "loss": 0.1291, + "step": 760 + }, + { + "epoch": 2.0902872777017785, + "grad_norm": 0.8559085726737976, + "learning_rate": 6.971755614565131e-06, + "loss": 0.1118, + "step": 765 + }, + { + "epoch": 2.103967168262654, + "grad_norm": 0.9953400492668152, + "learning_rate": 6.781847486254698e-06, + "loss": 0.1063, + "step": 770 + }, + { + "epoch": 2.1176470588235294, + "grad_norm": 1.0990335941314697, + "learning_rate": 6.593803320248625e-06, + "loss": 0.1128, + "step": 775 + }, + { + "epoch": 2.131326949384405, + "grad_norm": 1.0070558786392212, + "learning_rate": 6.407665766915886e-06, + "loss": 0.1068, + "step": 780 + }, + { + "epoch": 2.1450068399452804, + "grad_norm": 1.2120344638824463, + "learning_rate": 6.223477044185909e-06, + "loss": 0.1158, + "step": 785 + }, + { + "epoch": 2.158686730506156, + "grad_norm": 1.056104063987732, + "learning_rate": 6.04127892797311e-06, + "loss": 0.1325, + "step": 790 + }, + { + "epoch": 2.1723666210670314, + "grad_norm": 0.9447214007377625, + "learning_rate": 5.861112742701678e-06, + "loss": 0.1031, + "step": 795 + }, + { + "epoch": 2.186046511627907, + "grad_norm": 1.0051339864730835, + "learning_rate": 5.683019351932775e-06, + "loss": 0.1212, + "step": 800 + }, + { + "epoch": 2.1997264021887823, + "grad_norm": 1.0212894678115845, + "learning_rate": 5.507039149096251e-06, + "loss": 0.0996, + "step": 805 + }, + { + "epoch": 2.213406292749658, + "grad_norm": 1.0169320106506348, + "learning_rate": 5.333212048328983e-06, + "loss": 0.1124, + "step": 810 + }, + { + "epoch": 2.2270861833105333, + "grad_norm": 0.9749862551689148, + "learning_rate": 5.161577475421978e-06, + "loss": 0.1161, + "step": 815 + }, + { + "epoch": 2.2407660738714092, + "grad_norm": 0.9641462564468384, + "learning_rate": 4.992174358878126e-06, + "loss": 0.0977, + "step": 820 + }, + { + "epoch": 2.2544459644322847, + "grad_norm": 1.0865278244018555, + "learning_rate": 4.82504112108287e-06, + "loss": 0.1147, + "step": 825 + }, + { + "epoch": 2.26812585499316, + "grad_norm": 0.8668743371963501, + "learning_rate": 4.660215669589589e-06, + "loss": 0.1111, + "step": 830 + }, + { + "epoch": 2.2818057455540357, + "grad_norm": 0.8408440351486206, + "learning_rate": 4.497735388521762e-06, + "loss": 0.0947, + "step": 835 + }, + { + "epoch": 2.295485636114911, + "grad_norm": 0.9401116967201233, + "learning_rate": 4.337637130093879e-06, + "loss": 0.0969, + "step": 840 + }, + { + "epoch": 2.3091655266757867, + "grad_norm": 1.075936198234558, + "learning_rate": 4.179957206252962e-06, + "loss": 0.1125, + "step": 845 + }, + { + "epoch": 2.322845417236662, + "grad_norm": 0.8711522221565247, + "learning_rate": 4.0247313804426455e-06, + "loss": 0.0965, + "step": 850 + }, + { + "epoch": 2.3365253077975376, + "grad_norm": 0.8997689485549927, + "learning_rate": 3.871994859491643e-06, + "loss": 0.1102, + "step": 855 + }, + { + "epoch": 2.350205198358413, + "grad_norm": 0.7194733023643494, + "learning_rate": 3.7217822856285087e-06, + "loss": 0.0916, + "step": 860 + }, + { + "epoch": 2.3638850889192886, + "grad_norm": 0.8804566860198975, + "learning_rate": 3.574127728624365e-06, + "loss": 0.1026, + "step": 865 + }, + { + "epoch": 2.377564979480164, + "grad_norm": 1.0235337018966675, + "learning_rate": 3.429064678065584e-06, + "loss": 0.1042, + "step": 870 + }, + { + "epoch": 2.3912448700410396, + "grad_norm": 0.9694947004318237, + "learning_rate": 3.28662603575796e-06, + "loss": 0.0911, + "step": 875 + }, + { + "epoch": 2.404924760601915, + "grad_norm": 0.8703529834747314, + "learning_rate": 3.1468441082642396e-06, + "loss": 0.0972, + "step": 880 + }, + { + "epoch": 2.4186046511627906, + "grad_norm": 0.8119808435440063, + "learning_rate": 3.009750599576698e-06, + "loss": 0.1012, + "step": 885 + }, + { + "epoch": 2.432284541723666, + "grad_norm": 0.8737303018569946, + "learning_rate": 2.8753766039262872e-06, + "loss": 0.1086, + "step": 890 + }, + { + "epoch": 2.4459644322845415, + "grad_norm": 1.0442044734954834, + "learning_rate": 2.7437525987301887e-06, + "loss": 0.1332, + "step": 895 + }, + { + "epoch": 2.459644322845417, + "grad_norm": 0.8215887546539307, + "learning_rate": 2.614908437679195e-06, + "loss": 0.1019, + "step": 900 + }, + { + "epoch": 2.473324213406293, + "grad_norm": 0.7737637162208557, + "learning_rate": 2.4888733439665895e-06, + "loss": 0.1049, + "step": 905 + }, + { + "epoch": 2.4870041039671684, + "grad_norm": 0.9024238586425781, + "learning_rate": 2.365675903660019e-06, + "loss": 0.0918, + "step": 910 + }, + { + "epoch": 2.500683994528044, + "grad_norm": 0.8852851986885071, + "learning_rate": 2.2453440592178837e-06, + "loss": 0.1081, + "step": 915 + }, + { + "epoch": 2.5143638850889194, + "grad_norm": 0.7901269793510437, + "learning_rate": 2.1279051031516926e-06, + "loss": 0.0957, + "step": 920 + }, + { + "epoch": 2.528043775649795, + "grad_norm": 0.8661218285560608, + "learning_rate": 2.013385671835831e-06, + "loss": 0.0917, + "step": 925 + }, + { + "epoch": 2.5417236662106704, + "grad_norm": 0.775661289691925, + "learning_rate": 1.9018117394661816e-06, + "loss": 0.0999, + "step": 930 + }, + { + "epoch": 2.555403556771546, + "grad_norm": 0.8846317529678345, + "learning_rate": 1.7932086121688668e-06, + "loss": 0.0883, + "step": 935 + }, + { + "epoch": 2.5690834473324213, + "grad_norm": 0.7759684324264526, + "learning_rate": 1.6876009222605926e-06, + "loss": 0.0903, + "step": 940 + }, + { + "epoch": 2.582763337893297, + "grad_norm": 0.802230954170227, + "learning_rate": 1.5850126226617611e-06, + "loss": 0.0883, + "step": 945 + }, + { + "epoch": 2.5964432284541723, + "grad_norm": 0.818270742893219, + "learning_rate": 1.4854669814637145e-06, + "loss": 0.0974, + "step": 950 + }, + { + "epoch": 2.610123119015048, + "grad_norm": 0.8325796723365784, + "learning_rate": 1.388986576651276e-06, + "loss": 0.1114, + "step": 955 + }, + { + "epoch": 2.6238030095759233, + "grad_norm": 0.6422017216682434, + "learning_rate": 1.2955932909818403e-06, + "loss": 0.0936, + "step": 960 + }, + { + "epoch": 2.6374829001367988, + "grad_norm": 0.7672849893569946, + "learning_rate": 1.2053083070221326e-06, + "loss": 0.0869, + "step": 965 + }, + { + "epoch": 2.6511627906976747, + "grad_norm": 1.0981749296188354, + "learning_rate": 1.1181521023437751e-06, + "loss": 0.1031, + "step": 970 + }, + { + "epoch": 2.66484268125855, + "grad_norm": 0.8617575764656067, + "learning_rate": 1.034144444878784e-06, + "loss": 0.0934, + "step": 975 + }, + { + "epoch": 2.6785225718194257, + "grad_norm": 0.6794080138206482, + "learning_rate": 9.533043884359616e-07, + "loss": 0.0836, + "step": 980 + }, + { + "epoch": 2.692202462380301, + "grad_norm": 0.8418257832527161, + "learning_rate": 8.756502683793366e-07, + "loss": 0.0916, + "step": 985 + }, + { + "epoch": 2.7058823529411766, + "grad_norm": 0.9077285528182983, + "learning_rate": 8.011996974694708e-07, + "loss": 0.0782, + "step": 990 + }, + { + "epoch": 2.719562243502052, + "grad_norm": 0.9103076457977295, + "learning_rate": 7.299695618687357e-07, + "loss": 0.1174, + "step": 995 + }, + { + "epoch": 2.7332421340629276, + "grad_norm": 0.875389039516449, + "learning_rate": 6.619760173113437e-07, + "loss": 0.0948, + "step": 1000 + }, + { + "epoch": 2.746922024623803, + "grad_norm": 0.7634704113006592, + "learning_rate": 5.972344854390482e-07, + "loss": 0.0874, + "step": 1005 + }, + { + "epoch": 2.7606019151846786, + "grad_norm": 0.7932651042938232, + "learning_rate": 5.357596503033773e-07, + "loss": 0.0897, + "step": 1010 + }, + { + "epoch": 2.774281805745554, + "grad_norm": 0.833083987236023, + "learning_rate": 4.775654550351194e-07, + "loss": 0.0959, + "step": 1015 + }, + { + "epoch": 2.7879616963064295, + "grad_norm": 0.8941903114318848, + "learning_rate": 4.2266509868188584e-07, + "loss": 0.0981, + "step": 1020 + }, + { + "epoch": 2.801641586867305, + "grad_norm": 0.7966555953025818, + "learning_rate": 3.7107103321443125e-07, + "loss": 0.0988, + "step": 1025 + }, + { + "epoch": 2.8153214774281805, + "grad_norm": 0.8867335915565491, + "learning_rate": 3.2279496070241053e-07, + "loss": 0.1049, + "step": 1030 + }, + { + "epoch": 2.829001367989056, + "grad_norm": 0.5811817049980164, + "learning_rate": 2.7784783066023553e-07, + "loss": 0.0837, + "step": 1035 + }, + { + "epoch": 2.8426812585499315, + "grad_norm": 0.9008817076683044, + "learning_rate": 2.3623983756359825e-07, + "loss": 0.1031, + "step": 1040 + }, + { + "epoch": 2.856361149110807, + "grad_norm": 0.7333303689956665, + "learning_rate": 1.979804185372802e-07, + "loss": 0.0927, + "step": 1045 + }, + { + "epoch": 2.8700410396716824, + "grad_norm": 0.6272220015525818, + "learning_rate": 1.6307825121469165e-07, + "loss": 0.0793, + "step": 1050 + }, + { + "epoch": 2.883720930232558, + "grad_norm": 0.6819078326225281, + "learning_rate": 1.3154125176970732e-07, + "loss": 0.0853, + "step": 1055 + }, + { + "epoch": 2.8974008207934334, + "grad_norm": 0.8908887505531311, + "learning_rate": 1.0337657312119441e-07, + "loss": 0.1067, + "step": 1060 + }, + { + "epoch": 2.911080711354309, + "grad_norm": 0.8399171233177185, + "learning_rate": 7.859060331065371e-08, + "loss": 0.0926, + "step": 1065 + }, + { + "epoch": 2.924760601915185, + "grad_norm": 0.7101277112960815, + "learning_rate": 5.7188964053345174e-08, + "loss": 0.0849, + "step": 1070 + }, + { + "epoch": 2.9384404924760603, + "grad_norm": 0.6944915056228638, + "learning_rate": 3.9176509463227926e-08, + "loss": 0.0938, + "step": 1075 + }, + { + "epoch": 2.952120383036936, + "grad_norm": 0.8053812384605408, + "learning_rate": 2.4557324951994253e-08, + "loss": 0.0946, + "step": 1080 + }, + { + "epoch": 2.9658002735978113, + "grad_norm": 0.7195825576782227, + "learning_rate": 1.3334726302454136e-08, + "loss": 0.0922, + "step": 1085 + }, + { + "epoch": 2.9794801641586868, + "grad_norm": 0.6881802678108215, + "learning_rate": 5.511258916485185e-09, + "loss": 0.0726, + "step": 1090 + }, + { + "epoch": 2.9931600547195623, + "grad_norm": 0.7327389717102051, + "learning_rate": 1.088697237709435e-09, + "loss": 0.0853, + "step": 1095 + }, + { + "epoch": 3.0, + "step": 1098, + "total_flos": 1.3838964687204516e+18, + "train_loss": 0.43290177936273844, + "train_runtime": 607.2111, + "train_samples_per_second": 57.721, + "train_steps_per_second": 1.808 + } + ], + "logging_steps": 5, + "max_steps": 1098, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.3838964687204516e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa7dd16326f992c16bf6c82abbab66d3e079a8d9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/5_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:094c77f1f7a501172fc38b96f96ed595c29977bd7fac6c6eb1120a405c2e120e +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..36a64abdfaa9336328ef30dd94995d412e15e7c2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 6_128_e3_3e-5 + results: [] +--- + + + +# 6_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7f121f0fcf1f5c4868f63c38f9fedd41a4084643 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "down_proj", + "o_proj", + "q_proj", + "k_proj", + "gate_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4645e094c5d9f45a505141c40ba5e4827860d364 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753073bd226f9f37474562b3b0a97f85a1e6d121ccbe0223445d7d424b24ae72 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c7e3f4e38efa89f38cd2f94c8a90c61a330a3e71 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.560016766921343e+18, + "train_loss": 0.4549595033580045, + "train_runtime": 671.8088, + "train_samples": 12839, + "train_samples_per_second": 57.333, + "train_steps_per_second": 1.795 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c7e3f4e38efa89f38cd2f94c8a90c61a330a3e71 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.560016766921343e+18, + "train_loss": 0.4549595033580045, + "train_runtime": 671.8088, + "train_samples": 12839, + "train_samples_per_second": 57.333, + "train_steps_per_second": 1.795 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..01bb49e39b7f4d5496bc0c822ac0c782d941bd23 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1730 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1206, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012453300124533, + "grad_norm": 0.5887351632118225, + "learning_rate": 1.9672131147540985e-06, + "loss": 1.6273, + "step": 5 + }, + { + "epoch": 0.024906600249066, + "grad_norm": 0.5861614942550659, + "learning_rate": 4.426229508196722e-06, + "loss": 1.6165, + "step": 10 + }, + { + "epoch": 0.037359900373599, + "grad_norm": 0.5567939281463623, + "learning_rate": 6.885245901639345e-06, + "loss": 1.6274, + "step": 15 + }, + { + "epoch": 0.049813200498132, + "grad_norm": 0.48749974370002747, + "learning_rate": 9.344262295081968e-06, + "loss": 1.6392, + "step": 20 + }, + { + "epoch": 0.062266500622665005, + "grad_norm": 0.4769217371940613, + "learning_rate": 1.180327868852459e-05, + "loss": 1.5497, + "step": 25 + }, + { + "epoch": 0.074719800747198, + "grad_norm": 0.47825726866722107, + "learning_rate": 1.4262295081967213e-05, + "loss": 1.5922, + "step": 30 + }, + { + "epoch": 0.08717310087173101, + "grad_norm": 0.44503888487815857, + "learning_rate": 1.6721311475409834e-05, + "loss": 1.6224, + "step": 35 + }, + { + "epoch": 0.099626400996264, + "grad_norm": 0.5163134336471558, + "learning_rate": 1.9180327868852462e-05, + "loss": 1.5423, + "step": 40 + }, + { + "epoch": 0.11207970112079702, + "grad_norm": 0.4368438422679901, + "learning_rate": 2.1639344262295084e-05, + "loss": 1.517, + "step": 45 + }, + { + "epoch": 0.12453300124533001, + "grad_norm": 0.5108548402786255, + "learning_rate": 2.4098360655737705e-05, + "loss": 1.5003, + "step": 50 + }, + { + "epoch": 0.136986301369863, + "grad_norm": 0.5626624822616577, + "learning_rate": 2.6557377049180327e-05, + "loss": 1.5381, + "step": 55 + }, + { + "epoch": 0.149439601494396, + "grad_norm": 0.5296504497528076, + "learning_rate": 2.901639344262295e-05, + "loss": 1.4807, + "step": 60 + }, + { + "epoch": 0.16189290161892902, + "grad_norm": 0.5651581883430481, + "learning_rate": 2.9999491852149543e-05, + "loss": 1.4015, + "step": 65 + }, + { + "epoch": 0.17434620174346202, + "grad_norm": 0.5995021462440491, + "learning_rate": 2.999638662885322e-05, + "loss": 1.3728, + "step": 70 + }, + { + "epoch": 0.18679950186799502, + "grad_norm": 0.5771968960762024, + "learning_rate": 2.9990459070319718e-05, + "loss": 1.3705, + "step": 75 + }, + { + "epoch": 0.199252801992528, + "grad_norm": 0.6429035067558289, + "learning_rate": 2.9981710292121587e-05, + "loss": 1.3279, + "step": 80 + }, + { + "epoch": 0.21170610211706103, + "grad_norm": 0.6657248735427856, + "learning_rate": 2.9970141940787794e-05, + "loss": 1.2972, + "step": 85 + }, + { + "epoch": 0.22415940224159403, + "grad_norm": 0.6264260411262512, + "learning_rate": 2.9955756193493843e-05, + "loss": 1.3109, + "step": 90 + }, + { + "epoch": 0.23661270236612703, + "grad_norm": 0.7197771668434143, + "learning_rate": 2.9938555757652027e-05, + "loss": 1.2803, + "step": 95 + }, + { + "epoch": 0.24906600249066002, + "grad_norm": 0.6735364198684692, + "learning_rate": 2.991854387040189e-05, + "loss": 1.2921, + "step": 100 + }, + { + "epoch": 0.261519302615193, + "grad_norm": 0.7110151052474976, + "learning_rate": 2.9895724298000995e-05, + "loss": 1.2495, + "step": 105 + }, + { + "epoch": 0.273972602739726, + "grad_norm": 0.6756016612052917, + "learning_rate": 2.9870101335116107e-05, + "loss": 1.2148, + "step": 110 + }, + { + "epoch": 0.286425902864259, + "grad_norm": 0.6525706052780151, + "learning_rate": 2.9841679804014938e-05, + "loss": 1.1886, + "step": 115 + }, + { + "epoch": 0.298879202988792, + "grad_norm": 0.802573025226593, + "learning_rate": 2.981046505365859e-05, + "loss": 1.239, + "step": 120 + }, + { + "epoch": 0.31133250311332505, + "grad_norm": 0.701227068901062, + "learning_rate": 2.9776462958694873e-05, + "loss": 1.1768, + "step": 125 + }, + { + "epoch": 0.32378580323785805, + "grad_norm": 0.8699336051940918, + "learning_rate": 2.9739679918352686e-05, + "loss": 1.1699, + "step": 130 + }, + { + "epoch": 0.33623910336239105, + "grad_norm": 0.8620911836624146, + "learning_rate": 2.9700122855237685e-05, + "loss": 1.1097, + "step": 135 + }, + { + "epoch": 0.34869240348692404, + "grad_norm": 0.8736985325813293, + "learning_rate": 2.965779921402944e-05, + "loss": 1.1224, + "step": 140 + }, + { + "epoch": 0.36114570361145704, + "grad_norm": 0.7869607210159302, + "learning_rate": 2.961271696008033e-05, + "loss": 1.1458, + "step": 145 + }, + { + "epoch": 0.37359900373599003, + "grad_norm": 0.7510135769844055, + "learning_rate": 2.9564884577916463e-05, + "loss": 1.0607, + "step": 150 + }, + { + "epoch": 0.386052303860523, + "grad_norm": 0.8835483193397522, + "learning_rate": 2.951431106964088e-05, + "loss": 0.9982, + "step": 155 + }, + { + "epoch": 0.398505603985056, + "grad_norm": 0.8768940567970276, + "learning_rate": 2.9461005953239347e-05, + "loss": 1.0707, + "step": 160 + }, + { + "epoch": 0.410958904109589, + "grad_norm": 1.0027503967285156, + "learning_rate": 2.9404979260789064e-05, + "loss": 0.9947, + "step": 165 + }, + { + "epoch": 0.42341220423412207, + "grad_norm": 1.2167807817459106, + "learning_rate": 2.934624153657061e-05, + "loss": 1.083, + "step": 170 + }, + { + "epoch": 0.43586550435865506, + "grad_norm": 0.8677098751068115, + "learning_rate": 2.9284803835083507e-05, + "loss": 0.8912, + "step": 175 + }, + { + "epoch": 0.44831880448318806, + "grad_norm": 1.118492841720581, + "learning_rate": 2.9220677718965747e-05, + "loss": 0.9974, + "step": 180 + }, + { + "epoch": 0.46077210460772106, + "grad_norm": 1.2271910905838013, + "learning_rate": 2.9153875256817696e-05, + "loss": 0.9632, + "step": 185 + }, + { + "epoch": 0.47322540473225405, + "grad_norm": 0.9266152381896973, + "learning_rate": 2.9084409020930767e-05, + "loss": 0.955, + "step": 190 + }, + { + "epoch": 0.48567870485678705, + "grad_norm": 0.9245381355285645, + "learning_rate": 2.9012292084921306e-05, + "loss": 0.9236, + "step": 195 + }, + { + "epoch": 0.49813200498132004, + "grad_norm": 1.0813387632369995, + "learning_rate": 2.893753802127012e-05, + "loss": 0.9103, + "step": 200 + }, + { + "epoch": 0.5105853051058531, + "grad_norm": 1.077231764793396, + "learning_rate": 2.8860160898768123e-05, + "loss": 0.8645, + "step": 205 + }, + { + "epoch": 0.523038605230386, + "grad_norm": 1.2116554975509644, + "learning_rate": 2.8780175279868577e-05, + "loss": 0.9134, + "step": 210 + }, + { + "epoch": 0.5354919053549191, + "grad_norm": 1.053807020187378, + "learning_rate": 2.8697596217946426e-05, + "loss": 0.8685, + "step": 215 + }, + { + "epoch": 0.547945205479452, + "grad_norm": 1.252885103225708, + "learning_rate": 2.861243925446523e-05, + "loss": 0.8838, + "step": 220 + }, + { + "epoch": 0.5603985056039851, + "grad_norm": 1.0999681949615479, + "learning_rate": 2.8524720416052243e-05, + "loss": 0.8954, + "step": 225 + }, + { + "epoch": 0.572851805728518, + "grad_norm": 1.0925804376602173, + "learning_rate": 2.84344562114822e-05, + "loss": 0.8012, + "step": 230 + }, + { + "epoch": 0.5853051058530511, + "grad_norm": 1.252159595489502, + "learning_rate": 2.8341663628570328e-05, + "loss": 0.8414, + "step": 235 + }, + { + "epoch": 0.597758405977584, + "grad_norm": 1.0814017057418823, + "learning_rate": 2.824636013097524e-05, + "loss": 0.7283, + "step": 240 + }, + { + "epoch": 0.6102117061021171, + "grad_norm": 1.1314330101013184, + "learning_rate": 2.8148563654912257e-05, + "loss": 0.7679, + "step": 245 + }, + { + "epoch": 0.6226650062266501, + "grad_norm": 1.1451290845870972, + "learning_rate": 2.8048292605777766e-05, + "loss": 0.8084, + "step": 250 + }, + { + "epoch": 0.635118306351183, + "grad_norm": 1.1520881652832031, + "learning_rate": 2.7945565854685348e-05, + "loss": 0.7462, + "step": 255 + }, + { + "epoch": 0.6475716064757161, + "grad_norm": 1.1300252676010132, + "learning_rate": 2.7840402734914182e-05, + "loss": 0.7216, + "step": 260 + }, + { + "epoch": 0.660024906600249, + "grad_norm": 1.094291090965271, + "learning_rate": 2.773282303827052e-05, + "loss": 0.7772, + "step": 265 + }, + { + "epoch": 0.6724782067247821, + "grad_norm": 1.0885318517684937, + "learning_rate": 2.762284701136283e-05, + "loss": 0.7215, + "step": 270 + }, + { + "epoch": 0.684931506849315, + "grad_norm": 1.1558951139450073, + "learning_rate": 2.7510495351791397e-05, + "loss": 0.7029, + "step": 275 + }, + { + "epoch": 0.6973848069738481, + "grad_norm": 1.2189788818359375, + "learning_rate": 2.739578920425297e-05, + "loss": 0.6917, + "step": 280 + }, + { + "epoch": 0.709838107098381, + "grad_norm": 1.3507041931152344, + "learning_rate": 2.727875015656135e-05, + "loss": 0.6633, + "step": 285 + }, + { + "epoch": 0.7222914072229141, + "grad_norm": 1.2005877494812012, + "learning_rate": 2.7159400235584507e-05, + "loss": 0.6617, + "step": 290 + }, + { + "epoch": 0.7347447073474471, + "grad_norm": 1.2132086753845215, + "learning_rate": 2.703776190309914e-05, + "loss": 0.6823, + "step": 295 + }, + { + "epoch": 0.7471980074719801, + "grad_norm": 1.3106050491333008, + "learning_rate": 2.691385805156329e-05, + "loss": 0.642, + "step": 300 + }, + { + "epoch": 0.7596513075965131, + "grad_norm": 1.1777048110961914, + "learning_rate": 2.6787711999808026e-05, + "loss": 0.6697, + "step": 305 + }, + { + "epoch": 0.772104607721046, + "grad_norm": 1.1168150901794434, + "learning_rate": 2.6659347488648763e-05, + "loss": 0.6249, + "step": 310 + }, + { + "epoch": 0.7845579078455791, + "grad_norm": 1.141383409500122, + "learning_rate": 2.6528788676417238e-05, + "loss": 0.6913, + "step": 315 + }, + { + "epoch": 0.797011207970112, + "grad_norm": 1.2193230390548706, + "learning_rate": 2.6396060134414883e-05, + "loss": 0.6268, + "step": 320 + }, + { + "epoch": 0.8094645080946451, + "grad_norm": 1.2030247449874878, + "learning_rate": 2.6261186842288482e-05, + "loss": 0.6232, + "step": 325 + }, + { + "epoch": 0.821917808219178, + "grad_norm": 1.2804595232009888, + "learning_rate": 2.6124194183328992e-05, + "loss": 0.5997, + "step": 330 + }, + { + "epoch": 0.8343711083437111, + "grad_norm": 1.1899851560592651, + "learning_rate": 2.5985107939694346e-05, + "loss": 0.6181, + "step": 335 + }, + { + "epoch": 0.8468244084682441, + "grad_norm": 1.252703309059143, + "learning_rate": 2.5843954287557253e-05, + "loss": 0.5855, + "step": 340 + }, + { + "epoch": 0.8592777085927771, + "grad_norm": 1.624681830406189, + "learning_rate": 2.5700759792178813e-05, + "loss": 0.5577, + "step": 345 + }, + { + "epoch": 0.8717310087173101, + "grad_norm": 1.327043890953064, + "learning_rate": 2.5555551402908896e-05, + "loss": 0.595, + "step": 350 + }, + { + "epoch": 0.8841843088418431, + "grad_norm": 1.6150704622268677, + "learning_rate": 2.5408356448114255e-05, + "loss": 0.5415, + "step": 355 + }, + { + "epoch": 0.8966376089663761, + "grad_norm": 1.3825650215148926, + "learning_rate": 2.5259202630035296e-05, + "loss": 0.6395, + "step": 360 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 1.2688605785369873, + "learning_rate": 2.51081180195725e-05, + "loss": 0.5341, + "step": 365 + }, + { + "epoch": 0.9215442092154421, + "grad_norm": 1.2397621870040894, + "learning_rate": 2.4955131051003427e-05, + "loss": 0.5542, + "step": 370 + }, + { + "epoch": 0.933997509339975, + "grad_norm": 1.3171952962875366, + "learning_rate": 2.4800270516631376e-05, + "loss": 0.568, + "step": 375 + }, + { + "epoch": 0.9464508094645081, + "grad_norm": 1.2496514320373535, + "learning_rate": 2.4643565561366644e-05, + "loss": 0.4892, + "step": 380 + }, + { + "epoch": 0.958904109589041, + "grad_norm": 1.3866286277770996, + "learning_rate": 2.4485045677241415e-05, + "loss": 0.5666, + "step": 385 + }, + { + "epoch": 0.9713574097135741, + "grad_norm": 1.5077458620071411, + "learning_rate": 2.4324740697859326e-05, + "loss": 0.522, + "step": 390 + }, + { + "epoch": 0.9838107098381071, + "grad_norm": 1.2388253211975098, + "learning_rate": 2.4162680792780775e-05, + "loss": 0.5269, + "step": 395 + }, + { + "epoch": 0.9962640099626401, + "grad_norm": 1.1950420141220093, + "learning_rate": 2.399889646184494e-05, + "loss": 0.5274, + "step": 400 + }, + { + "epoch": 1.0074719800747198, + "grad_norm": 1.305106520652771, + "learning_rate": 2.3833418529429728e-05, + "loss": 0.4687, + "step": 405 + }, + { + "epoch": 1.0199252801992529, + "grad_norm": 1.2299377918243408, + "learning_rate": 2.366627813865055e-05, + "loss": 0.4443, + "step": 410 + }, + { + "epoch": 1.0323785803237857, + "grad_norm": 1.2517437934875488, + "learning_rate": 2.349750674549918e-05, + "loss": 0.4062, + "step": 415 + }, + { + "epoch": 1.0448318804483188, + "grad_norm": 1.3392006158828735, + "learning_rate": 2.332713611292371e-05, + "loss": 0.4805, + "step": 420 + }, + { + "epoch": 1.0572851805728518, + "grad_norm": 1.4506292343139648, + "learning_rate": 2.3155198304850694e-05, + "loss": 0.4205, + "step": 425 + }, + { + "epoch": 1.0697384806973849, + "grad_norm": 1.1956017017364502, + "learning_rate": 2.2981725680150745e-05, + "loss": 0.4168, + "step": 430 + }, + { + "epoch": 1.0821917808219177, + "grad_norm": 1.1639238595962524, + "learning_rate": 2.2806750886548508e-05, + "loss": 0.4414, + "step": 435 + }, + { + "epoch": 1.0946450809464507, + "grad_norm": 1.5714173316955566, + "learning_rate": 2.2630306854478335e-05, + "loss": 0.4443, + "step": 440 + }, + { + "epoch": 1.1070983810709838, + "grad_norm": 1.1216843128204346, + "learning_rate": 2.245242679088679e-05, + "loss": 0.3695, + "step": 445 + }, + { + "epoch": 1.1195516811955168, + "grad_norm": 1.283553957939148, + "learning_rate": 2.2273144172982985e-05, + "loss": 0.3814, + "step": 450 + }, + { + "epoch": 1.13200498132005, + "grad_norm": 1.2081447839736938, + "learning_rate": 2.2092492741938222e-05, + "loss": 0.4424, + "step": 455 + }, + { + "epoch": 1.1444582814445827, + "grad_norm": 1.4226611852645874, + "learning_rate": 2.1910506496535816e-05, + "loss": 0.4051, + "step": 460 + }, + { + "epoch": 1.1569115815691158, + "grad_norm": 1.2933145761489868, + "learning_rate": 2.1727219686772494e-05, + "loss": 0.4304, + "step": 465 + }, + { + "epoch": 1.1693648816936488, + "grad_norm": 1.230491280555725, + "learning_rate": 2.154266680741253e-05, + "loss": 0.3755, + "step": 470 + }, + { + "epoch": 1.1818181818181819, + "grad_norm": 1.228371500968933, + "learning_rate": 2.1356882591495795e-05, + "loss": 0.3426, + "step": 475 + }, + { + "epoch": 1.1942714819427147, + "grad_norm": 1.3170684576034546, + "learning_rate": 2.116990200380093e-05, + "loss": 0.3736, + "step": 480 + }, + { + "epoch": 1.2067247820672478, + "grad_norm": 1.4409031867980957, + "learning_rate": 2.0981760234264983e-05, + "loss": 0.3659, + "step": 485 + }, + { + "epoch": 1.2191780821917808, + "grad_norm": 1.3675901889801025, + "learning_rate": 2.07924926913606e-05, + "loss": 0.3461, + "step": 490 + }, + { + "epoch": 1.2316313823163139, + "grad_norm": 1.229503870010376, + "learning_rate": 2.0602134995432124e-05, + "loss": 0.3257, + "step": 495 + }, + { + "epoch": 1.244084682440847, + "grad_norm": 1.319492220878601, + "learning_rate": 2.0410722971991802e-05, + "loss": 0.3209, + "step": 500 + }, + { + "epoch": 1.25653798256538, + "grad_norm": 1.5138757228851318, + "learning_rate": 2.0218292644977396e-05, + "loss": 0.3512, + "step": 505 + }, + { + "epoch": 1.2689912826899128, + "grad_norm": 1.1985795497894287, + "learning_rate": 2.002488022997244e-05, + "loss": 0.3943, + "step": 510 + }, + { + "epoch": 1.2814445828144458, + "grad_norm": 1.4961262941360474, + "learning_rate": 1.9830522127390428e-05, + "loss": 0.35, + "step": 515 + }, + { + "epoch": 1.293897882938979, + "grad_norm": 1.3445194959640503, + "learning_rate": 1.963525491562421e-05, + "loss": 0.3209, + "step": 520 + }, + { + "epoch": 1.3063511830635117, + "grad_norm": 1.3593227863311768, + "learning_rate": 1.943911534416193e-05, + "loss": 0.3398, + "step": 525 + }, + { + "epoch": 1.3188044831880448, + "grad_norm": 1.4715900421142578, + "learning_rate": 1.924214032667069e-05, + "loss": 0.3395, + "step": 530 + }, + { + "epoch": 1.3312577833125778, + "grad_norm": 1.3314645290374756, + "learning_rate": 1.9044366934049408e-05, + "loss": 0.346, + "step": 535 + }, + { + "epoch": 1.3437110834371109, + "grad_norm": 1.4558959007263184, + "learning_rate": 1.8845832387451995e-05, + "loss": 0.3173, + "step": 540 + }, + { + "epoch": 1.356164383561644, + "grad_norm": 1.3109192848205566, + "learning_rate": 1.8646574051282337e-05, + "loss": 0.3424, + "step": 545 + }, + { + "epoch": 1.3686176836861768, + "grad_norm": 1.42848801612854, + "learning_rate": 1.844662942616224e-05, + "loss": 0.3087, + "step": 550 + }, + { + "epoch": 1.3810709838107098, + "grad_norm": 1.265738844871521, + "learning_rate": 1.8246036141873786e-05, + "loss": 0.3188, + "step": 555 + }, + { + "epoch": 1.3935242839352429, + "grad_norm": 1.316454291343689, + "learning_rate": 1.804483195027739e-05, + "loss": 0.2852, + "step": 560 + }, + { + "epoch": 1.405977584059776, + "grad_norm": 1.4138931035995483, + "learning_rate": 1.7843054718206818e-05, + "loss": 0.2729, + "step": 565 + }, + { + "epoch": 1.4184308841843087, + "grad_norm": 1.3249763250350952, + "learning_rate": 1.7640742420342672e-05, + "loss": 0.3233, + "step": 570 + }, + { + "epoch": 1.4308841843088418, + "grad_norm": 1.2381597757339478, + "learning_rate": 1.7437933132065452e-05, + "loss": 0.2656, + "step": 575 + }, + { + "epoch": 1.4433374844333748, + "grad_norm": 1.3339953422546387, + "learning_rate": 1.7234665022289777e-05, + "loss": 0.2878, + "step": 580 + }, + { + "epoch": 1.455790784557908, + "grad_norm": 1.5167180299758911, + "learning_rate": 1.7030976346280924e-05, + "loss": 0.2799, + "step": 585 + }, + { + "epoch": 1.468244084682441, + "grad_norm": 1.3256525993347168, + "learning_rate": 1.6826905438455174e-05, + "loss": 0.2983, + "step": 590 + }, + { + "epoch": 1.4806973848069738, + "grad_norm": 1.3447959423065186, + "learning_rate": 1.662249070516523e-05, + "loss": 0.2726, + "step": 595 + }, + { + "epoch": 1.4931506849315068, + "grad_norm": 1.3739451169967651, + "learning_rate": 1.641777061747209e-05, + "loss": 0.285, + "step": 600 + }, + { + "epoch": 1.5056039850560399, + "grad_norm": 1.2761503458023071, + "learning_rate": 1.621278370390476e-05, + "loss": 0.2526, + "step": 605 + }, + { + "epoch": 1.5180572851805727, + "grad_norm": 1.5421847105026245, + "learning_rate": 1.6007568543209153e-05, + "loss": 0.2881, + "step": 610 + }, + { + "epoch": 1.5305105853051058, + "grad_norm": 1.4516876935958862, + "learning_rate": 1.5802163757087513e-05, + "loss": 0.2596, + "step": 615 + }, + { + "epoch": 1.5429638854296388, + "grad_norm": 1.2657066583633423, + "learning_rate": 1.5596608002929793e-05, + "loss": 0.2837, + "step": 620 + }, + { + "epoch": 1.5554171855541719, + "grad_norm": 1.3412666320800781, + "learning_rate": 1.539093996653829e-05, + "loss": 0.2587, + "step": 625 + }, + { + "epoch": 1.567870485678705, + "grad_norm": 1.131179690361023, + "learning_rate": 1.518519835484691e-05, + "loss": 0.2371, + "step": 630 + }, + { + "epoch": 1.580323785803238, + "grad_norm": 1.1707836389541626, + "learning_rate": 1.4979421888636532e-05, + "loss": 0.23, + "step": 635 + }, + { + "epoch": 1.592777085927771, + "grad_norm": 1.3001599311828613, + "learning_rate": 1.4773649295247668e-05, + "loss": 0.2711, + "step": 640 + }, + { + "epoch": 1.6052303860523038, + "grad_norm": 1.0576181411743164, + "learning_rate": 1.4567919301291976e-05, + "loss": 0.2365, + "step": 645 + }, + { + "epoch": 1.6176836861768369, + "grad_norm": 1.3261315822601318, + "learning_rate": 1.4362270625363852e-05, + "loss": 0.2901, + "step": 650 + }, + { + "epoch": 1.6301369863013697, + "grad_norm": 1.6010857820510864, + "learning_rate": 1.415674197075355e-05, + "loss": 0.2714, + "step": 655 + }, + { + "epoch": 1.6425902864259028, + "grad_norm": 1.216659665107727, + "learning_rate": 1.3951372018163197e-05, + "loss": 0.2623, + "step": 660 + }, + { + "epoch": 1.6550435865504358, + "grad_norm": 1.1736416816711426, + "learning_rate": 1.3746199418427044e-05, + "loss": 0.2352, + "step": 665 + }, + { + "epoch": 1.6674968866749689, + "grad_norm": 1.513901948928833, + "learning_rate": 1.3541262785237321e-05, + "loss": 0.2395, + "step": 670 + }, + { + "epoch": 1.679950186799502, + "grad_norm": 1.2721256017684937, + "learning_rate": 1.3336600687877124e-05, + "loss": 0.2158, + "step": 675 + }, + { + "epoch": 1.692403486924035, + "grad_norm": 1.408715009689331, + "learning_rate": 1.313225164396162e-05, + "loss": 0.2313, + "step": 680 + }, + { + "epoch": 1.704856787048568, + "grad_norm": 1.4273520708084106, + "learning_rate": 1.2928254112189e-05, + "loss": 0.2444, + "step": 685 + }, + { + "epoch": 1.7173100871731009, + "grad_norm": 1.2238917350769043, + "learning_rate": 1.272464648510251e-05, + "loss": 0.1898, + "step": 690 + }, + { + "epoch": 1.729763387297634, + "grad_norm": 1.3300899267196655, + "learning_rate": 1.2521467081864945e-05, + "loss": 0.2163, + "step": 695 + }, + { + "epoch": 1.7422166874221667, + "grad_norm": 1.264064908027649, + "learning_rate": 1.2318754141046936e-05, + "loss": 0.2211, + "step": 700 + }, + { + "epoch": 1.7546699875466998, + "grad_norm": 1.4204264879226685, + "learning_rate": 1.211654581343039e-05, + "loss": 0.2177, + "step": 705 + }, + { + "epoch": 1.7671232876712328, + "grad_norm": 1.4684950113296509, + "learning_rate": 1.1914880154828514e-05, + "loss": 0.2272, + "step": 710 + }, + { + "epoch": 1.7795765877957659, + "grad_norm": 1.3195148706436157, + "learning_rate": 1.1713795118923659e-05, + "loss": 0.2372, + "step": 715 + }, + { + "epoch": 1.792029887920299, + "grad_norm": 1.2937626838684082, + "learning_rate": 1.1513328550124379e-05, + "loss": 0.2144, + "step": 720 + }, + { + "epoch": 1.804483188044832, + "grad_norm": 1.5575698614120483, + "learning_rate": 1.1313518176443099e-05, + "loss": 0.2093, + "step": 725 + }, + { + "epoch": 1.816936488169365, + "grad_norm": 1.2794543504714966, + "learning_rate": 1.1114401602395647e-05, + "loss": 0.1692, + "step": 730 + }, + { + "epoch": 1.8293897882938979, + "grad_norm": 1.3758814334869385, + "learning_rate": 1.0916016301924056e-05, + "loss": 0.2155, + "step": 735 + }, + { + "epoch": 1.841843088418431, + "grad_norm": 1.138843059539795, + "learning_rate": 1.071839961134393e-05, + "loss": 0.1879, + "step": 740 + }, + { + "epoch": 1.8542963885429637, + "grad_norm": 1.3463490009307861, + "learning_rate": 1.0521588722317707e-05, + "loss": 0.1728, + "step": 745 + }, + { + "epoch": 1.8667496886674968, + "grad_norm": 1.3083215951919556, + "learning_rate": 1.0325620674855147e-05, + "loss": 0.1784, + "step": 750 + }, + { + "epoch": 1.8792029887920298, + "grad_norm": 1.1345603466033936, + "learning_rate": 1.0130532350342381e-05, + "loss": 0.1833, + "step": 755 + }, + { + "epoch": 1.891656288916563, + "grad_norm": 1.1642392873764038, + "learning_rate": 9.936360464600769e-06, + "loss": 0.1796, + "step": 760 + }, + { + "epoch": 1.904109589041096, + "grad_norm": 1.0691404342651367, + "learning_rate": 9.74314156097697e-06, + "loss": 0.1876, + "step": 765 + }, + { + "epoch": 1.916562889165629, + "grad_norm": 1.2828145027160645, + "learning_rate": 9.550912003465442e-06, + "loss": 0.1796, + "step": 770 + }, + { + "epoch": 1.929016189290162, + "grad_norm": 1.124238133430481, + "learning_rate": 9.359707969864688e-06, + "loss": 0.1601, + "step": 775 + }, + { + "epoch": 1.9414694894146949, + "grad_norm": 1.2855015993118286, + "learning_rate": 9.16956544496857e-06, + "loss": 0.1671, + "step": 780 + }, + { + "epoch": 1.953922789539228, + "grad_norm": 1.425328254699707, + "learning_rate": 8.980520213793934e-06, + "loss": 0.1681, + "step": 785 + }, + { + "epoch": 1.9663760896637608, + "grad_norm": 1.1696804761886597, + "learning_rate": 8.792607854845829e-06, + "loss": 0.1712, + "step": 790 + }, + { + "epoch": 1.9788293897882938, + "grad_norm": 1.2211707830429077, + "learning_rate": 8.605863733421594e-06, + "loss": 0.1757, + "step": 795 + }, + { + "epoch": 1.9912826899128269, + "grad_norm": 1.321195363998413, + "learning_rate": 8.420322994955074e-06, + "loss": 0.1784, + "step": 800 + }, + { + "epoch": 2.0024906600249066, + "grad_norm": 1.0960582494735718, + "learning_rate": 8.236020558402222e-06, + "loss": 0.1597, + "step": 805 + }, + { + "epoch": 2.0149439601494397, + "grad_norm": 1.2663432359695435, + "learning_rate": 8.052991109669306e-06, + "loss": 0.1355, + "step": 810 + }, + { + "epoch": 2.0273972602739727, + "grad_norm": 1.3045967817306519, + "learning_rate": 7.87126909508499e-06, + "loss": 0.1338, + "step": 815 + }, + { + "epoch": 2.0398505603985058, + "grad_norm": 1.0567882061004639, + "learning_rate": 7.690888714917507e-06, + "loss": 0.1473, + "step": 820 + }, + { + "epoch": 2.052303860523039, + "grad_norm": 1.128483772277832, + "learning_rate": 7.511883916938109e-06, + "loss": 0.1369, + "step": 825 + }, + { + "epoch": 2.0647571606475714, + "grad_norm": 1.1974595785140991, + "learning_rate": 7.334288390032098e-06, + "loss": 0.1219, + "step": 830 + }, + { + "epoch": 2.0772104607721045, + "grad_norm": 1.4451351165771484, + "learning_rate": 7.158135557858515e-06, + "loss": 0.1302, + "step": 835 + }, + { + "epoch": 2.0896637608966375, + "grad_norm": 1.0159631967544556, + "learning_rate": 6.983458572559782e-06, + "loss": 0.128, + "step": 840 + }, + { + "epoch": 2.1021170610211706, + "grad_norm": 0.9308987855911255, + "learning_rate": 6.81029030852244e-06, + "loss": 0.1312, + "step": 845 + }, + { + "epoch": 2.1145703611457036, + "grad_norm": 1.1019511222839355, + "learning_rate": 6.63866335619015e-06, + "loss": 0.1248, + "step": 850 + }, + { + "epoch": 2.1270236612702367, + "grad_norm": 1.157514214515686, + "learning_rate": 6.468610015930143e-06, + "loss": 0.1177, + "step": 855 + }, + { + "epoch": 2.1394769613947697, + "grad_norm": 1.0707346200942993, + "learning_rate": 6.3001622919542495e-06, + "loss": 0.1184, + "step": 860 + }, + { + "epoch": 2.151930261519303, + "grad_norm": 1.2475167512893677, + "learning_rate": 6.133351886295691e-06, + "loss": 0.1127, + "step": 865 + }, + { + "epoch": 2.1643835616438354, + "grad_norm": 1.044665813446045, + "learning_rate": 5.9682101928426966e-06, + "loss": 0.1357, + "step": 870 + }, + { + "epoch": 2.1768368617683684, + "grad_norm": 1.1343681812286377, + "learning_rate": 5.804768291430174e-06, + "loss": 0.1243, + "step": 875 + }, + { + "epoch": 2.1892901618929015, + "grad_norm": 1.130814552307129, + "learning_rate": 5.643056941990433e-06, + "loss": 0.1237, + "step": 880 + }, + { + "epoch": 2.2017434620174345, + "grad_norm": 1.172777533531189, + "learning_rate": 5.483106578764136e-06, + "loss": 0.1081, + "step": 885 + }, + { + "epoch": 2.2141967621419676, + "grad_norm": 1.1048487424850464, + "learning_rate": 5.324947304572553e-06, + "loss": 0.1182, + "step": 890 + }, + { + "epoch": 2.2266500622665006, + "grad_norm": 1.1731038093566895, + "learning_rate": 5.1686088851521685e-06, + "loss": 0.1173, + "step": 895 + }, + { + "epoch": 2.2391033623910337, + "grad_norm": 1.374415397644043, + "learning_rate": 5.014120743552749e-06, + "loss": 0.1201, + "step": 900 + }, + { + "epoch": 2.2515566625155667, + "grad_norm": 1.069021224975586, + "learning_rate": 4.861511954599883e-06, + "loss": 0.1149, + "step": 905 + }, + { + "epoch": 2.2640099626401, + "grad_norm": 1.1398966312408447, + "learning_rate": 4.710811239423083e-06, + "loss": 0.1107, + "step": 910 + }, + { + "epoch": 2.276463262764633, + "grad_norm": 1.037116289138794, + "learning_rate": 4.5620469600504355e-06, + "loss": 0.1167, + "step": 915 + }, + { + "epoch": 2.2889165628891655, + "grad_norm": 1.137193202972412, + "learning_rate": 4.415247114070834e-06, + "loss": 0.1234, + "step": 920 + }, + { + "epoch": 2.3013698630136985, + "grad_norm": 0.9118360280990601, + "learning_rate": 4.270439329364799e-06, + "loss": 0.1105, + "step": 925 + }, + { + "epoch": 2.3138231631382316, + "grad_norm": 1.1188533306121826, + "learning_rate": 4.1276508589048986e-06, + "loss": 0.129, + "step": 930 + }, + { + "epoch": 2.3262764632627646, + "grad_norm": 1.04951810836792, + "learning_rate": 3.986908575626699e-06, + "loss": 0.1124, + "step": 935 + }, + { + "epoch": 2.3387297633872977, + "grad_norm": 0.8866438269615173, + "learning_rate": 3.848238967371265e-06, + "loss": 0.1081, + "step": 940 + }, + { + "epoch": 2.3511830635118307, + "grad_norm": 1.2120109796524048, + "learning_rate": 3.7116681319001018e-06, + "loss": 0.1165, + "step": 945 + }, + { + "epoch": 2.3636363636363638, + "grad_norm": 1.0470192432403564, + "learning_rate": 3.5772217719835384e-06, + "loss": 0.1078, + "step": 950 + }, + { + "epoch": 2.376089663760897, + "grad_norm": 0.8818226456642151, + "learning_rate": 3.444925190563445e-06, + "loss": 0.106, + "step": 955 + }, + { + "epoch": 2.3885429638854294, + "grad_norm": 0.7554528117179871, + "learning_rate": 3.3148032859911844e-06, + "loss": 0.0903, + "step": 960 + }, + { + "epoch": 2.4009962640099625, + "grad_norm": 0.8347125053405762, + "learning_rate": 3.186880547341727e-06, + "loss": 0.1028, + "step": 965 + }, + { + "epoch": 2.4134495641344955, + "grad_norm": 0.884638249874115, + "learning_rate": 3.0611810498047742e-06, + "loss": 0.0992, + "step": 970 + }, + { + "epoch": 2.4259028642590286, + "grad_norm": 1.009537935256958, + "learning_rate": 2.937728450153789e-06, + "loss": 0.1085, + "step": 975 + }, + { + "epoch": 2.4383561643835616, + "grad_norm": 0.8654739260673523, + "learning_rate": 2.816545982293752e-06, + "loss": 0.1073, + "step": 980 + }, + { + "epoch": 2.4508094645080947, + "grad_norm": 0.8862403035163879, + "learning_rate": 2.6976564528885422e-06, + "loss": 0.1091, + "step": 985 + }, + { + "epoch": 2.4632627646326277, + "grad_norm": 0.823351263999939, + "learning_rate": 2.5810822370686804e-06, + "loss": 0.0869, + "step": 990 + }, + { + "epoch": 2.4757160647571608, + "grad_norm": 1.0282278060913086, + "learning_rate": 2.466845274220316e-06, + "loss": 0.0953, + "step": 995 + }, + { + "epoch": 2.488169364881694, + "grad_norm": 0.826828122138977, + "learning_rate": 2.3549670638562016e-06, + "loss": 0.102, + "step": 1000 + }, + { + "epoch": 2.500622665006227, + "grad_norm": 0.8146514892578125, + "learning_rate": 2.2454686615694785e-06, + "loss": 0.1114, + "step": 1005 + }, + { + "epoch": 2.51307596513076, + "grad_norm": 0.8835738897323608, + "learning_rate": 2.138370675070977e-06, + "loss": 0.113, + "step": 1010 + }, + { + "epoch": 2.5255292652552925, + "grad_norm": 0.7949194312095642, + "learning_rate": 2.0336932603108355e-06, + "loss": 0.0932, + "step": 1015 + }, + { + "epoch": 2.5379825653798256, + "grad_norm": 0.8488854765892029, + "learning_rate": 1.9314561176851235e-06, + "loss": 0.114, + "step": 1020 + }, + { + "epoch": 2.5504358655043586, + "grad_norm": 0.9035933017730713, + "learning_rate": 1.8316784883282105e-06, + "loss": 0.0881, + "step": 1025 + }, + { + "epoch": 2.5628891656288917, + "grad_norm": 0.7956448197364807, + "learning_rate": 1.7343791504915684e-06, + "loss": 0.1052, + "step": 1030 + }, + { + "epoch": 2.5753424657534247, + "grad_norm": 0.7557389140129089, + "learning_rate": 1.6395764160096678e-06, + "loss": 0.1045, + "step": 1035 + }, + { + "epoch": 2.587795765877958, + "grad_norm": 1.004542350769043, + "learning_rate": 1.547288126853697e-06, + "loss": 0.1028, + "step": 1040 + }, + { + "epoch": 2.6002490660024904, + "grad_norm": 0.8218663930892944, + "learning_rate": 1.4575316517736714e-06, + "loss": 0.1213, + "step": 1045 + }, + { + "epoch": 2.6127023661270234, + "grad_norm": 1.056375503540039, + "learning_rate": 1.370323883029615e-06, + "loss": 0.1215, + "step": 1050 + }, + { + "epoch": 2.6251556662515565, + "grad_norm": 0.9413686394691467, + "learning_rate": 1.2856812332124274e-06, + "loss": 0.0899, + "step": 1055 + }, + { + "epoch": 2.6376089663760895, + "grad_norm": 0.8530963659286499, + "learning_rate": 1.2036196321550096e-06, + "loss": 0.1032, + "step": 1060 + }, + { + "epoch": 2.6500622665006226, + "grad_norm": 0.818458080291748, + "learning_rate": 1.1241545239342609e-06, + "loss": 0.1003, + "step": 1065 + }, + { + "epoch": 2.6625155666251556, + "grad_norm": 0.8720191717147827, + "learning_rate": 1.0473008639644814e-06, + "loss": 0.1026, + "step": 1070 + }, + { + "epoch": 2.6749688667496887, + "grad_norm": 0.9751065969467163, + "learning_rate": 9.730731161827528e-07, + "loss": 0.111, + "step": 1075 + }, + { + "epoch": 2.6874221668742218, + "grad_norm": 0.7011237740516663, + "learning_rate": 9.014852503268045e-07, + "loss": 0.0848, + "step": 1080 + }, + { + "epoch": 2.699875466998755, + "grad_norm": 0.6524854898452759, + "learning_rate": 8.325507393059101e-07, + "loss": 0.0978, + "step": 1085 + }, + { + "epoch": 2.712328767123288, + "grad_norm": 0.6677658557891846, + "learning_rate": 7.662825566652442e-07, + "loss": 0.0935, + "step": 1090 + }, + { + "epoch": 2.724782067247821, + "grad_norm": 0.7752848267555237, + "learning_rate": 7.026931741442783e-07, + "loss": 0.1025, + "step": 1095 + }, + { + "epoch": 2.7372353673723535, + "grad_norm": 0.8180628418922424, + "learning_rate": 6.417945593295638e-07, + "loss": 0.0832, + "step": 1100 + }, + { + "epoch": 2.7496886674968866, + "grad_norm": 0.7428537011146545, + "learning_rate": 5.835981734024348e-07, + "loss": 0.0923, + "step": 1105 + }, + { + "epoch": 2.7621419676214196, + "grad_norm": 0.8529137969017029, + "learning_rate": 5.281149689819981e-07, + "loss": 0.1007, + "step": 1110 + }, + { + "epoch": 2.7745952677459527, + "grad_norm": 0.750052273273468, + "learning_rate": 4.7535538806383006e-07, + "loss": 0.0913, + "step": 1115 + }, + { + "epoch": 2.7870485678704857, + "grad_norm": 0.9454846978187561, + "learning_rate": 4.2532936005479585e-07, + "loss": 0.0948, + "step": 1120 + }, + { + "epoch": 2.7995018679950188, + "grad_norm": 0.6916345953941345, + "learning_rate": 3.7804629990431884e-07, + "loss": 0.0839, + "step": 1125 + }, + { + "epoch": 2.811955168119552, + "grad_norm": 0.6618907451629639, + "learning_rate": 3.335151063324765e-07, + "loss": 0.0925, + "step": 1130 + }, + { + "epoch": 2.8244084682440844, + "grad_norm": 0.8401079773902893, + "learning_rate": 2.917441601552534e-07, + "loss": 0.087, + "step": 1135 + }, + { + "epoch": 2.8368617683686175, + "grad_norm": 0.7110039591789246, + "learning_rate": 2.527413227072628e-07, + "loss": 0.0831, + "step": 1140 + }, + { + "epoch": 2.8493150684931505, + "grad_norm": 1.1816599369049072, + "learning_rate": 2.165139343622352e-07, + "loss": 0.0954, + "step": 1145 + }, + { + "epoch": 2.8617683686176836, + "grad_norm": 0.7376044988632202, + "learning_rate": 1.830688131515551e-07, + "loss": 0.0929, + "step": 1150 + }, + { + "epoch": 2.8742216687422166, + "grad_norm": 0.873285710811615, + "learning_rate": 1.5241225348109898e-07, + "loss": 0.1029, + "step": 1155 + }, + { + "epoch": 2.8866749688667497, + "grad_norm": 0.6638308763504028, + "learning_rate": 1.2455002494661972e-07, + "loss": 0.0992, + "step": 1160 + }, + { + "epoch": 2.8991282689912827, + "grad_norm": 0.75803142786026, + "learning_rate": 9.948737124790331e-08, + "loss": 0.0825, + "step": 1165 + }, + { + "epoch": 2.911581569115816, + "grad_norm": 0.8235825300216675, + "learning_rate": 7.722900920190179e-08, + "loss": 0.0947, + "step": 1170 + }, + { + "epoch": 2.924034869240349, + "grad_norm": 0.7790074348449707, + "learning_rate": 5.777912785502493e-08, + "loss": 0.1019, + "step": 1175 + }, + { + "epoch": 2.936488169364882, + "grad_norm": 0.7766782641410828, + "learning_rate": 4.114138769474918e-08, + "loss": 0.1061, + "step": 1180 + }, + { + "epoch": 2.948941469489415, + "grad_norm": 0.7278475165367126, + "learning_rate": 2.731891996071878e-08, + "loss": 0.0922, + "step": 1185 + }, + { + "epoch": 2.9613947696139475, + "grad_norm": 0.8548258543014526, + "learning_rate": 1.6314326055440475e-08, + "loss": 0.0969, + "step": 1190 + }, + { + "epoch": 2.9738480697384806, + "grad_norm": 0.6877636313438416, + "learning_rate": 8.129677054693474e-09, + "loss": 0.0801, + "step": 1195 + }, + { + "epoch": 2.9863013698630136, + "grad_norm": 0.6995126605033875, + "learning_rate": 2.7665133177545708e-09, + "loss": 0.0829, + "step": 1200 + }, + { + "epoch": 2.9987546699875467, + "grad_norm": 0.7525503039360046, + "learning_rate": 2.2584419750504293e-10, + "loss": 0.0971, + "step": 1205 + }, + { + "epoch": 3.0, + "step": 1206, + "total_flos": 1.560016766921343e+18, + "train_loss": 0.4549595033580045, + "train_runtime": 671.8088, + "train_samples_per_second": 57.333, + "train_steps_per_second": 1.795 + } + ], + "logging_steps": 5, + "max_steps": 1206, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.560016766921343e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed2214927677909f344bbb093de18de0acf7800 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/6_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c67a0964ff0b49bfa6b1af170483e0b1900c3571320a433049f52a9502a80a +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..64adbc1873844b580ffd8d39867844dae757b766 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 7_128_e3_3e-5 + results: [] +--- + + + +# 7_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2afb143c37f42d6a1fb6c12cbc90bafc0b483e17 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj", + "up_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbf864b80b46bbb989ac35f3f5536e9cf293b1a6 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c56a0fc351a52abc6f599d91df75741e6d8c6f8e907492e1b456082fd86d84cb +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a9ee39e4f4ed7ba7ad7a78b98cf567fb89183a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.662813689058689e+18, + "train_loss": 0.4476705863227109, + "train_runtime": 712.3692, + "train_samples": 14097, + "train_samples_per_second": 59.367, + "train_steps_per_second": 1.857 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a9ee39e4f4ed7ba7ad7a78b98cf567fb89183a9 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.662813689058689e+18, + "train_loss": 0.4476705863227109, + "train_runtime": 712.3692, + "train_samples": 14097, + "train_samples_per_second": 59.367, + "train_steps_per_second": 1.857 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..72779ebaedc80928a3882567fc94d2d06061d3e2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1891 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1323, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011337868480725623, + "grad_norm": 0.6278727650642395, + "learning_rate": 1.791044776119403e-06, + "loss": 1.648, + "step": 5 + }, + { + "epoch": 0.022675736961451247, + "grad_norm": 0.5820619463920593, + "learning_rate": 4.029850746268657e-06, + "loss": 1.5904, + "step": 10 + }, + { + "epoch": 0.034013605442176874, + "grad_norm": 0.5843833088874817, + "learning_rate": 6.268656716417911e-06, + "loss": 1.6254, + "step": 15 + }, + { + "epoch": 0.045351473922902494, + "grad_norm": 0.5164104700088501, + "learning_rate": 8.507462686567164e-06, + "loss": 1.5635, + "step": 20 + }, + { + "epoch": 0.05668934240362812, + "grad_norm": 0.500350832939148, + "learning_rate": 1.0746268656716418e-05, + "loss": 1.5889, + "step": 25 + }, + { + "epoch": 0.06802721088435375, + "grad_norm": 0.46628180146217346, + "learning_rate": 1.2985074626865672e-05, + "loss": 1.5585, + "step": 30 + }, + { + "epoch": 0.07936507936507936, + "grad_norm": 0.4542357325553894, + "learning_rate": 1.5223880597014927e-05, + "loss": 1.536, + "step": 35 + }, + { + "epoch": 0.09070294784580499, + "grad_norm": 0.4567652642726898, + "learning_rate": 1.746268656716418e-05, + "loss": 1.5387, + "step": 40 + }, + { + "epoch": 0.10204081632653061, + "grad_norm": 0.48667946457862854, + "learning_rate": 1.9701492537313435e-05, + "loss": 1.5212, + "step": 45 + }, + { + "epoch": 0.11337868480725624, + "grad_norm": 0.4661814868450165, + "learning_rate": 2.194029850746269e-05, + "loss": 1.4783, + "step": 50 + }, + { + "epoch": 0.12471655328798185, + "grad_norm": 0.5183005928993225, + "learning_rate": 2.417910447761194e-05, + "loss": 1.5203, + "step": 55 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 0.496567040681839, + "learning_rate": 2.6417910447761193e-05, + "loss": 1.4827, + "step": 60 + }, + { + "epoch": 0.1473922902494331, + "grad_norm": 0.5922465920448303, + "learning_rate": 2.8656716417910447e-05, + "loss": 1.4326, + "step": 65 + }, + { + "epoch": 0.15873015873015872, + "grad_norm": 0.5578612685203552, + "learning_rate": 2.9999812310137735e-05, + "loss": 1.4662, + "step": 70 + }, + { + "epoch": 0.17006802721088435, + "grad_norm": 0.5586779117584229, + "learning_rate": 2.9997700853128978e-05, + "loss": 1.3758, + "step": 75 + }, + { + "epoch": 0.18140589569160998, + "grad_norm": 0.5587146282196045, + "learning_rate": 2.99932436581301e-05, + "loss": 1.295, + "step": 80 + }, + { + "epoch": 0.1927437641723356, + "grad_norm": 0.61674565076828, + "learning_rate": 2.9986441422275408e-05, + "loss": 1.2846, + "step": 85 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.6117451190948486, + "learning_rate": 2.997729520947884e-05, + "loss": 1.369, + "step": 90 + }, + { + "epoch": 0.21541950113378686, + "grad_norm": 0.6470246911048889, + "learning_rate": 2.9965806450267583e-05, + "loss": 1.304, + "step": 95 + }, + { + "epoch": 0.22675736961451248, + "grad_norm": 0.651279091835022, + "learning_rate": 2.9951976941558322e-05, + "loss": 1.2224, + "step": 100 + }, + { + "epoch": 0.23809523809523808, + "grad_norm": 0.6942313313484192, + "learning_rate": 2.993580884637621e-05, + "loss": 1.3259, + "step": 105 + }, + { + "epoch": 0.2494331065759637, + "grad_norm": 0.5753467082977295, + "learning_rate": 2.9917304693516506e-05, + "loss": 1.1784, + "step": 110 + }, + { + "epoch": 0.26077097505668934, + "grad_norm": 0.7424439787864685, + "learning_rate": 2.989646737714912e-05, + "loss": 1.2668, + "step": 115 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 0.7898168563842773, + "learning_rate": 2.9873300156365898e-05, + "loss": 1.1993, + "step": 120 + }, + { + "epoch": 0.2834467120181406, + "grad_norm": 0.7269379496574402, + "learning_rate": 2.9847806654670893e-05, + "loss": 1.1695, + "step": 125 + }, + { + "epoch": 0.2947845804988662, + "grad_norm": 0.7303867936134338, + "learning_rate": 2.9819990859413647e-05, + "loss": 1.2471, + "step": 130 + }, + { + "epoch": 0.30612244897959184, + "grad_norm": 0.890695333480835, + "learning_rate": 2.9789857121165498e-05, + "loss": 1.0506, + "step": 135 + }, + { + "epoch": 0.31746031746031744, + "grad_norm": 0.8312426805496216, + "learning_rate": 2.9757410153039167e-05, + "loss": 1.1138, + "step": 140 + }, + { + "epoch": 0.3287981859410431, + "grad_norm": 0.8675890564918518, + "learning_rate": 2.9722655029951572e-05, + "loss": 1.1471, + "step": 145 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.0326390266418457, + "learning_rate": 2.9685597187830082e-05, + "loss": 1.1207, + "step": 150 + }, + { + "epoch": 0.35147392290249435, + "grad_norm": 1.0067397356033325, + "learning_rate": 2.9646242422762315e-05, + "loss": 1.0898, + "step": 155 + }, + { + "epoch": 0.36281179138321995, + "grad_norm": 0.9312989711761475, + "learning_rate": 2.9604596890089567e-05, + "loss": 1.038, + "step": 160 + }, + { + "epoch": 0.3741496598639456, + "grad_norm": 0.8867245316505432, + "learning_rate": 2.9560667103444098e-05, + "loss": 1.0115, + "step": 165 + }, + { + "epoch": 0.3854875283446712, + "grad_norm": 0.9552533626556396, + "learning_rate": 2.9514459933730355e-05, + "loss": 1.0097, + "step": 170 + }, + { + "epoch": 0.3968253968253968, + "grad_norm": 0.9180281758308411, + "learning_rate": 2.946598260805031e-05, + "loss": 1.0214, + "step": 175 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 0.9654207825660706, + "learning_rate": 2.9415242708573094e-05, + "loss": 0.9529, + "step": 180 + }, + { + "epoch": 0.41950113378684806, + "grad_norm": 1.018403172492981, + "learning_rate": 2.936224817134911e-05, + "loss": 0.9732, + "step": 185 + }, + { + "epoch": 0.4308390022675737, + "grad_norm": 0.9574979543685913, + "learning_rate": 2.930700728506876e-05, + "loss": 0.9778, + "step": 190 + }, + { + "epoch": 0.4421768707482993, + "grad_norm": 0.9441145658493042, + "learning_rate": 2.9249528689766073e-05, + "loss": 0.9687, + "step": 195 + }, + { + "epoch": 0.45351473922902497, + "grad_norm": 1.071329116821289, + "learning_rate": 2.918982137546731e-05, + "loss": 0.909, + "step": 200 + }, + { + "epoch": 0.46485260770975056, + "grad_norm": 1.111531138420105, + "learning_rate": 2.912789468078489e-05, + "loss": 0.9146, + "step": 205 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.1116747856140137, + "learning_rate": 2.9063758291456756e-05, + "loss": 0.9032, + "step": 210 + }, + { + "epoch": 0.4875283446712018, + "grad_norm": 1.0023722648620605, + "learning_rate": 2.8997422238831495e-05, + "loss": 0.8809, + "step": 215 + }, + { + "epoch": 0.4988662131519274, + "grad_norm": 0.9449451565742493, + "learning_rate": 2.8928896898299307e-05, + "loss": 0.8861, + "step": 220 + }, + { + "epoch": 0.5102040816326531, + "grad_norm": 1.153075933456421, + "learning_rate": 2.8858192987669303e-05, + "loss": 0.8589, + "step": 225 + }, + { + "epoch": 0.5215419501133787, + "grad_norm": 1.0422732830047607, + "learning_rate": 2.87853215654931e-05, + "loss": 0.8678, + "step": 230 + }, + { + "epoch": 0.5328798185941043, + "grad_norm": 1.1311839818954468, + "learning_rate": 2.8710294029335243e-05, + "loss": 0.824, + "step": 235 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 1.2948718070983887, + "learning_rate": 2.8633122113990518e-05, + "loss": 0.8294, + "step": 240 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 1.1338742971420288, + "learning_rate": 2.8553817889648575e-05, + "loss": 0.7902, + "step": 245 + }, + { + "epoch": 0.5668934240362812, + "grad_norm": 1.2702280282974243, + "learning_rate": 2.847239376000607e-05, + "loss": 0.7767, + "step": 250 + }, + { + "epoch": 0.5782312925170068, + "grad_norm": 1.1065067052841187, + "learning_rate": 2.838886246032663e-05, + "loss": 0.8159, + "step": 255 + }, + { + "epoch": 0.5895691609977324, + "grad_norm": 1.1018950939178467, + "learning_rate": 2.8303237055448984e-05, + "loss": 0.8371, + "step": 260 + }, + { + "epoch": 0.6009070294784581, + "grad_norm": 1.313409447669983, + "learning_rate": 2.821553093774355e-05, + "loss": 0.8397, + "step": 265 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 1.1373920440673828, + "learning_rate": 2.8125757825017755e-05, + "loss": 0.735, + "step": 270 + }, + { + "epoch": 0.6235827664399093, + "grad_norm": 1.2059340476989746, + "learning_rate": 2.803393175837051e-05, + "loss": 0.7309, + "step": 275 + }, + { + "epoch": 0.6349206349206349, + "grad_norm": 1.1052274703979492, + "learning_rate": 2.7940067099996068e-05, + "loss": 0.7224, + "step": 280 + }, + { + "epoch": 0.6462585034013606, + "grad_norm": 1.22477388381958, + "learning_rate": 2.7844178530937693e-05, + "loss": 0.7435, + "step": 285 + }, + { + "epoch": 0.6575963718820862, + "grad_norm": 1.1236454248428345, + "learning_rate": 2.774628104879144e-05, + "loss": 0.7823, + "step": 290 + }, + { + "epoch": 0.6689342403628118, + "grad_norm": 1.2116868495941162, + "learning_rate": 2.7646389965360455e-05, + "loss": 0.7249, + "step": 295 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 1.2217785120010376, + "learning_rate": 2.7544520904260087e-05, + "loss": 0.7106, + "step": 300 + }, + { + "epoch": 0.691609977324263, + "grad_norm": 1.2648394107818604, + "learning_rate": 2.7440689798474262e-05, + "loss": 0.6877, + "step": 305 + }, + { + "epoch": 0.7029478458049887, + "grad_norm": 1.2114914655685425, + "learning_rate": 2.733491288786347e-05, + "loss": 0.6419, + "step": 310 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 1.1324774026870728, + "learning_rate": 2.7227206716624716e-05, + "loss": 0.6994, + "step": 315 + }, + { + "epoch": 0.7256235827664399, + "grad_norm": 1.1974220275878906, + "learning_rate": 2.7117588130703936e-05, + "loss": 0.6474, + "step": 320 + }, + { + "epoch": 0.7369614512471655, + "grad_norm": 1.2385644912719727, + "learning_rate": 2.7006074275161158e-05, + "loss": 0.709, + "step": 325 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 1.2581818103790283, + "learning_rate": 2.689268259148891e-05, + "loss": 0.6762, + "step": 330 + }, + { + "epoch": 0.7596371882086168, + "grad_norm": 1.2627633810043335, + "learning_rate": 2.677743081488425e-05, + "loss": 0.6647, + "step": 335 + }, + { + "epoch": 0.7709750566893424, + "grad_norm": 1.2939047813415527, + "learning_rate": 2.6660336971474892e-05, + "loss": 0.6278, + "step": 340 + }, + { + "epoch": 0.782312925170068, + "grad_norm": 1.1734991073608398, + "learning_rate": 2.654141937549976e-05, + "loss": 0.591, + "step": 345 + }, + { + "epoch": 0.7936507936507936, + "grad_norm": 1.4242619276046753, + "learning_rate": 2.642069662644456e-05, + "loss": 0.5928, + "step": 350 + }, + { + "epoch": 0.8049886621315193, + "grad_norm": 1.2074871063232422, + "learning_rate": 2.629818760613268e-05, + "loss": 0.5861, + "step": 355 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 1.685478925704956, + "learning_rate": 2.6173911475771956e-05, + "loss": 0.6418, + "step": 360 + }, + { + "epoch": 0.8276643990929705, + "grad_norm": 1.1117711067199707, + "learning_rate": 2.6047887672957725e-05, + "loss": 0.6255, + "step": 365 + }, + { + "epoch": 0.8390022675736961, + "grad_norm": 1.2696367502212524, + "learning_rate": 2.5920135908632678e-05, + "loss": 0.568, + "step": 370 + }, + { + "epoch": 0.8503401360544217, + "grad_norm": 1.3452447652816772, + "learning_rate": 2.5790676164003916e-05, + "loss": 0.5836, + "step": 375 + }, + { + "epoch": 0.8616780045351474, + "grad_norm": 1.3422842025756836, + "learning_rate": 2.5659528687417785e-05, + "loss": 0.5811, + "step": 380 + }, + { + "epoch": 0.873015873015873, + "grad_norm": 1.2750893831253052, + "learning_rate": 2.552671399119287e-05, + "loss": 0.5516, + "step": 385 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 1.3484853506088257, + "learning_rate": 2.5392252848411788e-05, + "loss": 0.631, + "step": 390 + }, + { + "epoch": 0.8956916099773242, + "grad_norm": 1.1090818643569946, + "learning_rate": 2.525616628967207e-05, + "loss": 0.6079, + "step": 395 + }, + { + "epoch": 0.9070294784580499, + "grad_norm": 1.2315822839736938, + "learning_rate": 2.511847559979691e-05, + "loss": 0.5415, + "step": 400 + }, + { + "epoch": 0.9183673469387755, + "grad_norm": 1.1082944869995117, + "learning_rate": 2.4979202314506025e-05, + "loss": 0.542, + "step": 405 + }, + { + "epoch": 0.9297052154195011, + "grad_norm": 1.4752061367034912, + "learning_rate": 2.483836821704737e-05, + "loss": 0.5488, + "step": 410 + }, + { + "epoch": 0.9410430839002267, + "grad_norm": 1.4213231801986694, + "learning_rate": 2.4695995334790066e-05, + "loss": 0.5605, + "step": 415 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 1.369572639465332, + "learning_rate": 2.455210593577919e-05, + "loss": 0.4866, + "step": 420 + }, + { + "epoch": 0.963718820861678, + "grad_norm": 1.2265255451202393, + "learning_rate": 2.440672252525291e-05, + "loss": 0.4928, + "step": 425 + }, + { + "epoch": 0.9750566893424036, + "grad_norm": 1.3354957103729248, + "learning_rate": 2.425986784212252e-05, + "loss": 0.5097, + "step": 430 + }, + { + "epoch": 0.9863945578231292, + "grad_norm": 1.1898748874664307, + "learning_rate": 2.4111564855415896e-05, + "loss": 0.5014, + "step": 435 + }, + { + "epoch": 0.9977324263038548, + "grad_norm": 1.3913853168487549, + "learning_rate": 2.3961836760685047e-05, + "loss": 0.5225, + "step": 440 + }, + { + "epoch": 1.0090702947845804, + "grad_norm": 1.3400938510894775, + "learning_rate": 2.3810706976378136e-05, + "loss": 0.4364, + "step": 445 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 1.2621558904647827, + "learning_rate": 2.3658199140176696e-05, + "loss": 0.4392, + "step": 450 + }, + { + "epoch": 1.0317460317460316, + "grad_norm": 1.176568865776062, + "learning_rate": 2.3504337105298534e-05, + "loss": 0.3912, + "step": 455 + }, + { + "epoch": 1.0430839002267573, + "grad_norm": 1.4495505094528198, + "learning_rate": 2.3349144936766957e-05, + "loss": 0.3391, + "step": 460 + }, + { + "epoch": 1.054421768707483, + "grad_norm": 1.2325565814971924, + "learning_rate": 2.3192646907646816e-05, + "loss": 0.4116, + "step": 465 + }, + { + "epoch": 1.0657596371882085, + "grad_norm": 1.4485830068588257, + "learning_rate": 2.3034867495248064e-05, + "loss": 0.4194, + "step": 470 + }, + { + "epoch": 1.0770975056689343, + "grad_norm": 1.394208550453186, + "learning_rate": 2.2875831377297318e-05, + "loss": 0.4455, + "step": 475 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 1.2572530508041382, + "learning_rate": 2.2715563428078148e-05, + "loss": 0.3763, + "step": 480 + }, + { + "epoch": 1.0997732426303855, + "grad_norm": 1.1372933387756348, + "learning_rate": 2.2554088714540523e-05, + "loss": 0.3951, + "step": 485 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 1.2195903062820435, + "learning_rate": 2.239143249238021e-05, + "loss": 0.3922, + "step": 490 + }, + { + "epoch": 1.1224489795918366, + "grad_norm": 1.3315958976745605, + "learning_rate": 2.2227620202088622e-05, + "loss": 0.3336, + "step": 495 + }, + { + "epoch": 1.1337868480725624, + "grad_norm": 1.230363368988037, + "learning_rate": 2.2062677464973737e-05, + "loss": 0.3655, + "step": 500 + }, + { + "epoch": 1.145124716553288, + "grad_norm": 1.1458308696746826, + "learning_rate": 2.1896630079152774e-05, + "loss": 0.3748, + "step": 505 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 1.5523139238357544, + "learning_rate": 2.1729504015517203e-05, + "loss": 0.386, + "step": 510 + }, + { + "epoch": 1.1678004535147393, + "grad_norm": 1.2879523038864136, + "learning_rate": 2.1561325413670713e-05, + "loss": 0.3586, + "step": 515 + }, + { + "epoch": 1.179138321995465, + "grad_norm": 1.1309213638305664, + "learning_rate": 2.139212057784082e-05, + "loss": 0.4295, + "step": 520 + }, + { + "epoch": 1.1904761904761905, + "grad_norm": 1.2958279848098755, + "learning_rate": 2.1221915972764717e-05, + "loss": 0.3517, + "step": 525 + }, + { + "epoch": 1.2018140589569162, + "grad_norm": 1.243368148803711, + "learning_rate": 2.105073821955001e-05, + "loss": 0.3432, + "step": 530 + }, + { + "epoch": 1.2131519274376417, + "grad_norm": 1.3680540323257446, + "learning_rate": 2.0878614091510995e-05, + "loss": 0.3908, + "step": 535 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 1.358788251876831, + "learning_rate": 2.0705570509981158e-05, + "loss": 0.3698, + "step": 540 + }, + { + "epoch": 1.235827664399093, + "grad_norm": 1.328899621963501, + "learning_rate": 2.0531634540102496e-05, + "loss": 0.3104, + "step": 545 + }, + { + "epoch": 1.2471655328798186, + "grad_norm": 1.208894968032837, + "learning_rate": 2.035683338659234e-05, + "loss": 0.3616, + "step": 550 + }, + { + "epoch": 1.2585034013605443, + "grad_norm": 1.2764071226119995, + "learning_rate": 2.0181194389488375e-05, + "loss": 0.356, + "step": 555 + }, + { + "epoch": 1.2698412698412698, + "grad_norm": 1.2002720832824707, + "learning_rate": 2.0004745019872472e-05, + "loss": 0.2987, + "step": 560 + }, + { + "epoch": 1.2811791383219955, + "grad_norm": 1.2015793323516846, + "learning_rate": 1.982751287557405e-05, + "loss": 0.3546, + "step": 565 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 1.2007602453231812, + "learning_rate": 1.9649525676853553e-05, + "loss": 0.3833, + "step": 570 + }, + { + "epoch": 1.3038548752834467, + "grad_norm": 1.2693496942520142, + "learning_rate": 1.947081126206687e-05, + "loss": 0.3057, + "step": 575 + }, + { + "epoch": 1.3151927437641724, + "grad_norm": 1.3291699886322021, + "learning_rate": 1.929139758331122e-05, + "loss": 0.3683, + "step": 580 + }, + { + "epoch": 1.3265306122448979, + "grad_norm": 1.2622551918029785, + "learning_rate": 1.9111312702053233e-05, + "loss": 0.3619, + "step": 585 + }, + { + "epoch": 1.3378684807256236, + "grad_norm": 1.4458867311477661, + "learning_rate": 1.893058478473999e-05, + "loss": 0.3304, + "step": 590 + }, + { + "epoch": 1.3492063492063493, + "grad_norm": 1.2265455722808838, + "learning_rate": 1.87492420983936e-05, + "loss": 0.2732, + "step": 595 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 1.4112454652786255, + "learning_rate": 1.8567313006190042e-05, + "loss": 0.3357, + "step": 600 + }, + { + "epoch": 1.3718820861678005, + "grad_norm": 1.195788860321045, + "learning_rate": 1.838482596302299e-05, + "loss": 0.3314, + "step": 605 + }, + { + "epoch": 1.383219954648526, + "grad_norm": 1.6853605508804321, + "learning_rate": 1.8201809511053263e-05, + "loss": 0.3463, + "step": 610 + }, + { + "epoch": 1.3945578231292517, + "grad_norm": 1.3292146921157837, + "learning_rate": 1.8018292275244693e-05, + "loss": 0.3068, + "step": 615 + }, + { + "epoch": 1.4058956916099774, + "grad_norm": 1.3174530267715454, + "learning_rate": 1.7834302958886926e-05, + "loss": 0.3196, + "step": 620 + }, + { + "epoch": 1.417233560090703, + "grad_norm": 1.4327112436294556, + "learning_rate": 1.764987033910611e-05, + "loss": 0.3065, + "step": 625 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 1.3234883546829224, + "learning_rate": 1.7465023262363904e-05, + "loss": 0.3247, + "step": 630 + }, + { + "epoch": 1.439909297052154, + "grad_norm": 1.364027500152588, + "learning_rate": 1.727979063994576e-05, + "loss": 0.318, + "step": 635 + }, + { + "epoch": 1.4512471655328798, + "grad_norm": 1.2583746910095215, + "learning_rate": 1.7094201443438955e-05, + "loss": 0.2539, + "step": 640 + }, + { + "epoch": 1.4625850340136055, + "grad_norm": 1.3222655057907104, + "learning_rate": 1.6908284700201295e-05, + "loss": 0.2731, + "step": 645 + }, + { + "epoch": 1.473922902494331, + "grad_norm": 1.190109372138977, + "learning_rate": 1.6722069488821012e-05, + "loss": 0.2569, + "step": 650 + }, + { + "epoch": 1.4852607709750567, + "grad_norm": 1.2756363153457642, + "learning_rate": 1.6535584934568708e-05, + "loss": 0.2742, + "step": 655 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 1.409051537513733, + "learning_rate": 1.6348860204841948e-05, + "loss": 0.2457, + "step": 660 + }, + { + "epoch": 1.507936507936508, + "grad_norm": 1.2327147722244263, + "learning_rate": 1.6161924504603325e-05, + "loss": 0.302, + "step": 665 + }, + { + "epoch": 1.5192743764172336, + "grad_norm": 1.219650387763977, + "learning_rate": 1.597480707181257e-05, + "loss": 0.3021, + "step": 670 + }, + { + "epoch": 1.5306122448979593, + "grad_norm": 1.439704418182373, + "learning_rate": 1.5787537172853582e-05, + "loss": 0.2773, + "step": 675 + }, + { + "epoch": 1.5419501133786848, + "grad_norm": 1.119301676750183, + "learning_rate": 1.5600144097956955e-05, + "loss": 0.2565, + "step": 680 + }, + { + "epoch": 1.5532879818594103, + "grad_norm": 1.295109510421753, + "learning_rate": 1.5412657156618802e-05, + "loss": 0.2931, + "step": 685 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 1.1821118593215942, + "learning_rate": 1.5225105673016569e-05, + "loss": 0.2378, + "step": 690 + }, + { + "epoch": 1.5759637188208617, + "grad_norm": 1.373043417930603, + "learning_rate": 1.503751898142251e-05, + "loss": 0.2487, + "step": 695 + }, + { + "epoch": 1.5873015873015874, + "grad_norm": 1.630577802658081, + "learning_rate": 1.484992642161565e-05, + "loss": 0.2632, + "step": 700 + }, + { + "epoch": 1.598639455782313, + "grad_norm": 1.1399919986724854, + "learning_rate": 1.466235733429285e-05, + "loss": 0.2427, + "step": 705 + }, + { + "epoch": 1.6099773242630384, + "grad_norm": 1.1558167934417725, + "learning_rate": 1.44748410564797e-05, + "loss": 0.2528, + "step": 710 + }, + { + "epoch": 1.6213151927437641, + "grad_norm": 1.6343483924865723, + "learning_rate": 1.4287406916942067e-05, + "loss": 0.2807, + "step": 715 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 1.1404987573623657, + "learning_rate": 1.410008423159883e-05, + "loss": 0.2158, + "step": 720 + }, + { + "epoch": 1.6439909297052155, + "grad_norm": 1.2889701128005981, + "learning_rate": 1.3912902298936718e-05, + "loss": 0.2372, + "step": 725 + }, + { + "epoch": 1.655328798185941, + "grad_norm": 1.2436479330062866, + "learning_rate": 1.3725890395427832e-05, + "loss": 0.2543, + "step": 730 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 1.0901046991348267, + "learning_rate": 1.3539077770950602e-05, + "loss": 0.2467, + "step": 735 + }, + { + "epoch": 1.6780045351473922, + "grad_norm": 1.4698289632797241, + "learning_rate": 1.335249364421491e-05, + "loss": 0.2189, + "step": 740 + }, + { + "epoch": 1.689342403628118, + "grad_norm": 1.2879670858383179, + "learning_rate": 1.316616719819212e-05, + "loss": 0.1968, + "step": 745 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 1.1305407285690308, + "learning_rate": 1.298012757555065e-05, + "loss": 0.1876, + "step": 750 + }, + { + "epoch": 1.7120181405895691, + "grad_norm": 1.3351932764053345, + "learning_rate": 1.279440387409788e-05, + "loss": 0.2227, + "step": 755 + }, + { + "epoch": 1.7233560090702946, + "grad_norm": 1.2829217910766602, + "learning_rate": 1.2609025142229049e-05, + "loss": 0.2217, + "step": 760 + }, + { + "epoch": 1.7346938775510203, + "grad_norm": 1.1921294927597046, + "learning_rate": 1.2424020374383914e-05, + "loss": 0.1939, + "step": 765 + }, + { + "epoch": 1.746031746031746, + "grad_norm": 1.208138346672058, + "learning_rate": 1.2239418506511836e-05, + "loss": 0.2001, + "step": 770 + }, + { + "epoch": 1.7573696145124718, + "grad_norm": 1.4924089908599854, + "learning_rate": 1.2055248411545986e-05, + "loss": 0.2538, + "step": 775 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 1.2203882932662964, + "learning_rate": 1.1871538894887443e-05, + "loss": 0.2112, + "step": 780 + }, + { + "epoch": 1.780045351473923, + "grad_norm": 1.3454453945159912, + "learning_rate": 1.1688318689899852e-05, + "loss": 0.194, + "step": 785 + }, + { + "epoch": 1.7913832199546484, + "grad_norm": 1.2197067737579346, + "learning_rate": 1.150561645341532e-05, + "loss": 0.1974, + "step": 790 + }, + { + "epoch": 1.8027210884353742, + "grad_norm": 1.2827750444412231, + "learning_rate": 1.1323460761252323e-05, + "loss": 0.1813, + "step": 795 + }, + { + "epoch": 1.8140589569160999, + "grad_norm": 1.1169768571853638, + "learning_rate": 1.114188010374623e-05, + "loss": 0.2017, + "step": 800 + }, + { + "epoch": 1.8253968253968254, + "grad_norm": 1.172804355621338, + "learning_rate": 1.0960902881293259e-05, + "loss": 0.1868, + "step": 805 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 1.345308780670166, + "learning_rate": 1.0780557399908465e-05, + "loss": 0.2249, + "step": 810 + }, + { + "epoch": 1.8480725623582765, + "grad_norm": 1.461124300956726, + "learning_rate": 1.0600871866798486e-05, + "loss": 0.2136, + "step": 815 + }, + { + "epoch": 1.8594104308390023, + "grad_norm": 1.221682071685791, + "learning_rate": 1.0421874385949744e-05, + "loss": 0.2052, + "step": 820 + }, + { + "epoch": 1.870748299319728, + "grad_norm": 1.187596321105957, + "learning_rate": 1.0243592953732828e-05, + "loss": 0.1768, + "step": 825 + }, + { + "epoch": 1.8820861678004537, + "grad_norm": 1.1603496074676514, + "learning_rate": 1.0066055454523651e-05, + "loss": 0.1961, + "step": 830 + }, + { + "epoch": 1.8934240362811792, + "grad_norm": 1.1339302062988281, + "learning_rate": 9.889289656342179e-06, + "loss": 0.1633, + "step": 835 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 1.185678482055664, + "learning_rate": 9.713323206509292e-06, + "loss": 0.1947, + "step": 840 + }, + { + "epoch": 1.9160997732426304, + "grad_norm": 1.265136480331421, + "learning_rate": 9.538183627322604e-06, + "loss": 0.1819, + "step": 845 + }, + { + "epoch": 1.927437641723356, + "grad_norm": 1.3096390962600708, + "learning_rate": 9.363898311751779e-06, + "loss": 0.1611, + "step": 850 + }, + { + "epoch": 1.9387755102040818, + "grad_norm": 1.0483171939849854, + "learning_rate": 9.190494519154093e-06, + "loss": 0.1597, + "step": 855 + }, + { + "epoch": 1.9501133786848073, + "grad_norm": 1.276808500289917, + "learning_rate": 9.017999371010896e-06, + "loss": 0.186, + "step": 860 + }, + { + "epoch": 1.9614512471655328, + "grad_norm": 1.1813976764678955, + "learning_rate": 8.846439846685619e-06, + "loss": 0.1535, + "step": 865 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 1.1090126037597656, + "learning_rate": 8.67584277920406e-06, + "loss": 0.1872, + "step": 870 + }, + { + "epoch": 1.9841269841269842, + "grad_norm": 1.0467617511749268, + "learning_rate": 8.506234851057494e-06, + "loss": 0.1711, + "step": 875 + }, + { + "epoch": 1.99546485260771, + "grad_norm": 1.1972156763076782, + "learning_rate": 8.33764259002937e-06, + "loss": 0.1487, + "step": 880 + }, + { + "epoch": 2.006802721088435, + "grad_norm": 0.9210137724876404, + "learning_rate": 8.170092365046194e-06, + "loss": 0.1534, + "step": 885 + }, + { + "epoch": 2.018140589569161, + "grad_norm": 1.2337560653686523, + "learning_rate": 8.003610382053252e-06, + "loss": 0.1267, + "step": 890 + }, + { + "epoch": 2.0294784580498866, + "grad_norm": 1.2393581867218018, + "learning_rate": 7.83822267991583e-06, + "loss": 0.1157, + "step": 895 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 1.0719647407531738, + "learning_rate": 7.673955126346589e-06, + "loss": 0.1619, + "step": 900 + }, + { + "epoch": 2.052154195011338, + "grad_norm": 1.1385760307312012, + "learning_rate": 7.5108334138596335e-06, + "loss": 0.1396, + "step": 905 + }, + { + "epoch": 2.0634920634920633, + "grad_norm": 1.0354576110839844, + "learning_rate": 7.3488830557520815e-06, + "loss": 0.1574, + "step": 910 + }, + { + "epoch": 2.074829931972789, + "grad_norm": 1.113344430923462, + "learning_rate": 7.188129382113624e-06, + "loss": 0.1362, + "step": 915 + }, + { + "epoch": 2.0861678004535147, + "grad_norm": 0.918411135673523, + "learning_rate": 7.028597535864695e-06, + "loss": 0.1313, + "step": 920 + }, + { + "epoch": 2.0975056689342404, + "grad_norm": 0.9371508359909058, + "learning_rate": 6.870312468823965e-06, + "loss": 0.1333, + "step": 925 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 1.1416962146759033, + "learning_rate": 6.713298937805755e-06, + "loss": 0.1254, + "step": 930 + }, + { + "epoch": 2.120181405895692, + "grad_norm": 1.434099793434143, + "learning_rate": 6.557581500747856e-06, + "loss": 0.114, + "step": 935 + }, + { + "epoch": 2.131519274376417, + "grad_norm": 1.327864170074463, + "learning_rate": 6.403184512870544e-06, + "loss": 0.1343, + "step": 940 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 1.1591793298721313, + "learning_rate": 6.25013212286724e-06, + "loss": 0.1212, + "step": 945 + }, + { + "epoch": 2.1541950113378685, + "grad_norm": 1.2772685289382935, + "learning_rate": 6.098448269127522e-06, + "loss": 0.1227, + "step": 950 + }, + { + "epoch": 2.165532879818594, + "grad_norm": 1.8926531076431274, + "learning_rate": 5.948156675992982e-06, + "loss": 0.1166, + "step": 955 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 1.0052694082260132, + "learning_rate": 5.799280850046603e-06, + "loss": 0.1192, + "step": 960 + }, + { + "epoch": 2.188208616780045, + "grad_norm": 1.0480914115905762, + "learning_rate": 5.651844076436165e-06, + "loss": 0.1218, + "step": 965 + }, + { + "epoch": 2.199546485260771, + "grad_norm": 0.8825601935386658, + "learning_rate": 5.505869415232299e-06, + "loss": 0.1311, + "step": 970 + }, + { + "epoch": 2.2108843537414966, + "grad_norm": 1.2157886028289795, + "learning_rate": 5.361379697821742e-06, + "loss": 0.1276, + "step": 975 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.9576120972633362, + "learning_rate": 5.218397523336375e-06, + "loss": 0.1337, + "step": 980 + }, + { + "epoch": 2.233560090702948, + "grad_norm": 0.9250437021255493, + "learning_rate": 5.07694525511853e-06, + "loss": 0.1076, + "step": 985 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.9240543246269226, + "learning_rate": 4.937045017223265e-06, + "loss": 0.1153, + "step": 990 + }, + { + "epoch": 2.256235827664399, + "grad_norm": 0.9547942876815796, + "learning_rate": 4.798718690957999e-06, + "loss": 0.134, + "step": 995 + }, + { + "epoch": 2.2675736961451247, + "grad_norm": 0.9011281728744507, + "learning_rate": 4.6619879114601255e-06, + "loss": 0.0987, + "step": 1000 + }, + { + "epoch": 2.2789115646258504, + "grad_norm": 0.950387716293335, + "learning_rate": 4.526874064313131e-06, + "loss": 0.1143, + "step": 1005 + }, + { + "epoch": 2.290249433106576, + "grad_norm": 1.0265562534332275, + "learning_rate": 4.393398282201788e-06, + "loss": 0.099, + "step": 1010 + }, + { + "epoch": 2.3015873015873014, + "grad_norm": 0.9019437432289124, + "learning_rate": 4.261581441606824e-06, + "loss": 0.1231, + "step": 1015 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.9016369581222534, + "learning_rate": 4.1314441595397156e-06, + "loss": 0.1108, + "step": 1020 + }, + { + "epoch": 2.324263038548753, + "grad_norm": 0.9470375776290894, + "learning_rate": 4.00300679031808e-06, + "loss": 0.1058, + "step": 1025 + }, + { + "epoch": 2.3356009070294785, + "grad_norm": 1.4025534391403198, + "learning_rate": 3.876289422382078e-06, + "loss": 0.1236, + "step": 1030 + }, + { + "epoch": 2.3469387755102042, + "grad_norm": 1.1743429899215698, + "learning_rate": 3.7513118751524934e-06, + "loss": 0.1045, + "step": 1035 + }, + { + "epoch": 2.35827664399093, + "grad_norm": 0.9945691227912903, + "learning_rate": 3.628093695930836e-06, + "loss": 0.1258, + "step": 1040 + }, + { + "epoch": 2.369614512471655, + "grad_norm": 0.8175275921821594, + "learning_rate": 3.50665415684201e-06, + "loss": 0.1192, + "step": 1045 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 1.185677409172058, + "learning_rate": 3.3870122518200134e-06, + "loss": 0.0991, + "step": 1050 + }, + { + "epoch": 2.3922902494331066, + "grad_norm": 1.0005359649658203, + "learning_rate": 3.269186693637208e-06, + "loss": 0.115, + "step": 1055 + }, + { + "epoch": 2.4036281179138324, + "grad_norm": 0.9162901639938354, + "learning_rate": 3.153195910977475e-06, + "loss": 0.0959, + "step": 1060 + }, + { + "epoch": 2.4149659863945576, + "grad_norm": 1.0694159269332886, + "learning_rate": 3.039058045553872e-06, + "loss": 0.1177, + "step": 1065 + }, + { + "epoch": 2.4263038548752833, + "grad_norm": 0.9803506135940552, + "learning_rate": 2.9267909492711447e-06, + "loss": 0.1048, + "step": 1070 + }, + { + "epoch": 2.437641723356009, + "grad_norm": 0.8046582937240601, + "learning_rate": 2.816412181433574e-06, + "loss": 0.1128, + "step": 1075 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.9108407497406006, + "learning_rate": 2.7079390059985835e-06, + "loss": 0.1174, + "step": 1080 + }, + { + "epoch": 2.4603174603174605, + "grad_norm": 0.9451397657394409, + "learning_rate": 2.6013883888765533e-06, + "loss": 0.1235, + "step": 1085 + }, + { + "epoch": 2.471655328798186, + "grad_norm": 0.9862541556358337, + "learning_rate": 2.4967769952772284e-06, + "loss": 0.0919, + "step": 1090 + }, + { + "epoch": 2.4829931972789114, + "grad_norm": 0.9681780934333801, + "learning_rate": 2.394121187103184e-06, + "loss": 0.1, + "step": 1095 + }, + { + "epoch": 2.494331065759637, + "grad_norm": 0.7209926843643188, + "learning_rate": 2.293437020390701e-06, + "loss": 0.0915, + "step": 1100 + }, + { + "epoch": 2.505668934240363, + "grad_norm": 0.8104588389396667, + "learning_rate": 2.194740242798528e-06, + "loss": 0.1168, + "step": 1105 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.9460147023200989, + "learning_rate": 2.0980462911448028e-06, + "loss": 0.0989, + "step": 1110 + }, + { + "epoch": 2.528344671201814, + "grad_norm": 0.7768290638923645, + "learning_rate": 2.003370288992666e-06, + "loss": 0.0939, + "step": 1115 + }, + { + "epoch": 2.5396825396825395, + "grad_norm": 0.8534517288208008, + "learning_rate": 1.9107270442848305e-06, + "loss": 0.1095, + "step": 1120 + }, + { + "epoch": 2.5510204081632653, + "grad_norm": 0.7945001721382141, + "learning_rate": 1.8201310470275174e-06, + "loss": 0.0925, + "step": 1125 + }, + { + "epoch": 2.562358276643991, + "grad_norm": 0.9445656538009644, + "learning_rate": 1.7315964670241164e-06, + "loss": 0.0847, + "step": 1130 + }, + { + "epoch": 2.5736961451247167, + "grad_norm": 0.7719889879226685, + "learning_rate": 1.6451371516589636e-06, + "loss": 0.1146, + "step": 1135 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 1.2266815900802612, + "learning_rate": 1.5607666237314927e-06, + "loss": 0.1052, + "step": 1140 + }, + { + "epoch": 2.5963718820861676, + "grad_norm": 0.8976446986198425, + "learning_rate": 1.4784980793411985e-06, + "loss": 0.0969, + "step": 1145 + }, + { + "epoch": 2.6077097505668934, + "grad_norm": 0.8700320720672607, + "learning_rate": 1.3983443858236677e-06, + "loss": 0.1165, + "step": 1150 + }, + { + "epoch": 2.619047619047619, + "grad_norm": 0.8452789187431335, + "learning_rate": 1.3203180797380583e-06, + "loss": 0.119, + "step": 1155 + }, + { + "epoch": 2.630385487528345, + "grad_norm": 0.7109794020652771, + "learning_rate": 1.2444313649062877e-06, + "loss": 0.1073, + "step": 1160 + }, + { + "epoch": 2.64172335600907, + "grad_norm": 0.8362886309623718, + "learning_rate": 1.1706961105042835e-06, + "loss": 0.1102, + "step": 1165 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7606858611106873, + "learning_rate": 1.099123849205565e-06, + "loss": 0.0859, + "step": 1170 + }, + { + "epoch": 2.6643990929705215, + "grad_norm": 0.8384146690368652, + "learning_rate": 1.029725775377452e-06, + "loss": 0.0798, + "step": 1175 + }, + { + "epoch": 2.675736961451247, + "grad_norm": 0.7494062781333923, + "learning_rate": 9.625127433302082e-07, + "loss": 0.0991, + "step": 1180 + }, + { + "epoch": 2.687074829931973, + "grad_norm": 0.770908772945404, + "learning_rate": 8.974952656193403e-07, + "loss": 0.089, + "step": 1185 + }, + { + "epoch": 2.6984126984126986, + "grad_norm": 0.7784945368766785, + "learning_rate": 8.346835114013713e-07, + "loss": 0.0971, + "step": 1190 + }, + { + "epoch": 2.7097505668934243, + "grad_norm": 0.9724911451339722, + "learning_rate": 7.740873048433212e-07, + "loss": 0.094, + "step": 1195 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7335108518600464, + "learning_rate": 7.157161235861404e-07, + "loss": 0.109, + "step": 1200 + }, + { + "epoch": 2.7324263038548753, + "grad_norm": 0.8774988651275635, + "learning_rate": 6.595790972623505e-07, + "loss": 0.1155, + "step": 1205 + }, + { + "epoch": 2.743764172335601, + "grad_norm": 0.8004282116889954, + "learning_rate": 6.056850060680985e-07, + "loss": 0.0887, + "step": 1210 + }, + { + "epoch": 2.7551020408163263, + "grad_norm": 0.8016287088394165, + "learning_rate": 5.540422793898881e-07, + "loss": 0.0956, + "step": 1215 + }, + { + "epoch": 2.766439909297052, + "grad_norm": 0.7795893549919128, + "learning_rate": 5.046589944861679e-07, + "loss": 0.1035, + "step": 1220 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.7347130179405212, + "learning_rate": 4.5754287522398575e-07, + "loss": 0.0816, + "step": 1225 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.7891128659248352, + "learning_rate": 4.127012908709427e-07, + "loss": 0.1152, + "step": 1230 + }, + { + "epoch": 2.800453514739229, + "grad_norm": 0.7946211099624634, + "learning_rate": 3.70141254942572e-07, + "loss": 0.1065, + "step": 1235 + }, + { + "epoch": 2.811791383219955, + "grad_norm": 0.8520053625106812, + "learning_rate": 3.298694241053901e-07, + "loss": 0.0831, + "step": 1240 + }, + { + "epoch": 2.8231292517006805, + "grad_norm": 0.9317479133605957, + "learning_rate": 2.9189209713575914e-07, + "loss": 0.0939, + "step": 1245 + }, + { + "epoch": 2.834467120181406, + "grad_norm": 0.8101786971092224, + "learning_rate": 2.5621521393470017e-07, + "loss": 0.085, + "step": 1250 + }, + { + "epoch": 2.8458049886621315, + "grad_norm": 0.8038177490234375, + "learning_rate": 2.2284435459885954e-07, + "loss": 0.096, + "step": 1255 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6774724721908569, + "learning_rate": 1.9178473854775558e-07, + "loss": 0.0912, + "step": 1260 + }, + { + "epoch": 2.868480725623583, + "grad_norm": 0.7410919666290283, + "learning_rate": 1.630412237074147e-07, + "loss": 0.0913, + "step": 1265 + }, + { + "epoch": 2.879818594104308, + "grad_norm": 0.730821430683136, + "learning_rate": 1.3661830575056765e-07, + "loss": 0.0881, + "step": 1270 + }, + { + "epoch": 2.891156462585034, + "grad_norm": 0.8278365135192871, + "learning_rate": 1.1252011739349366e-07, + "loss": 0.0935, + "step": 1275 + }, + { + "epoch": 2.9024943310657596, + "grad_norm": 0.8545149564743042, + "learning_rate": 9.075042774963405e-08, + "loss": 0.0969, + "step": 1280 + }, + { + "epoch": 2.9138321995464853, + "grad_norm": 0.8369249105453491, + "learning_rate": 7.131264174008722e-08, + "loss": 0.0858, + "step": 1285 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.8694900870323181, + "learning_rate": 5.4209799561049656e-08, + "loss": 0.0934, + "step": 1290 + }, + { + "epoch": 2.9365079365079367, + "grad_norm": 0.7598402500152588, + "learning_rate": 3.9444576208311214e-08, + "loss": 0.1032, + "step": 1295 + }, + { + "epoch": 2.947845804988662, + "grad_norm": 0.8265975713729858, + "learning_rate": 2.701928105886653e-08, + "loss": 0.0938, + "step": 1300 + }, + { + "epoch": 2.9591836734693877, + "grad_norm": 0.8547737002372742, + "learning_rate": 1.69358575097206e-08, + "loss": 0.0812, + "step": 1305 + }, + { + "epoch": 2.9705215419501134, + "grad_norm": 0.8027358055114746, + "learning_rate": 9.195882673916912e-09, + "loss": 0.0922, + "step": 1310 + }, + { + "epoch": 2.981859410430839, + "grad_norm": 0.8648499250411987, + "learning_rate": 3.800567133879773e-09, + "loss": 0.1003, + "step": 1315 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.8078812956809998, + "learning_rate": 7.507547520591018e-10, + "loss": 0.113, + "step": 1320 + }, + { + "epoch": 3.0, + "step": 1323, + "total_flos": 1.662813689058689e+18, + "train_loss": 0.4476705863227109, + "train_runtime": 712.3692, + "train_samples_per_second": 59.367, + "train_steps_per_second": 1.857 + } + ], + "logging_steps": 5, + "max_steps": 1323, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.662813689058689e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5423495075621bc0552a32913a6466afe2cab0c0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/7_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0977899b8b395df68d2d774d68e0f2f9b5fea06335594c9643b786690238a10e +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b884761f38bdd793c7f704582d56aa4c7f4c115c --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 8_128_e3_3e-5 + results: [] +--- + + + +# 8_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e26349d5858159ad54a86994ff8f10322b2f9491 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "gate_proj", + "q_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..508a0c7399225e7f3c9e1d57201708679c22f2f2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4233ea9cdd15a2f5c60e03b2f58dd11510c567d4ae12cc08f8d169e89ec71ca5 +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b7d849e306d48d1ba5f0002bc1d2f29ede231db8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.375168712117584e+18, + "train_loss": 0.44531178133908705, + "train_runtime": 594.5009, + "train_samples": 11288, + "train_samples_per_second": 56.962, + "train_steps_per_second": 1.781 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b7d849e306d48d1ba5f0002bc1d2f29ede231db8 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.375168712117584e+18, + "train_loss": 0.44531178133908705, + "train_runtime": 594.5009, + "train_samples": 11288, + "train_samples_per_second": 56.962, + "train_steps_per_second": 1.781 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..40b4a30f2442a6d34738b3c1e88812649d8bb6d2 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1520 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1059, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014164305949008499, + "grad_norm": 0.700126588344574, + "learning_rate": 2.2641509433962262e-06, + "loss": 1.6281, + "step": 5 + }, + { + "epoch": 0.028328611898016998, + "grad_norm": 0.5834487080574036, + "learning_rate": 5.094339622641509e-06, + "loss": 1.6475, + "step": 10 + }, + { + "epoch": 0.042492917847025496, + "grad_norm": 0.5358775854110718, + "learning_rate": 7.924528301886793e-06, + "loss": 1.6045, + "step": 15 + }, + { + "epoch": 0.056657223796033995, + "grad_norm": 0.4567791223526001, + "learning_rate": 1.0754716981132076e-05, + "loss": 1.5979, + "step": 20 + }, + { + "epoch": 0.0708215297450425, + "grad_norm": 0.5322059392929077, + "learning_rate": 1.358490566037736e-05, + "loss": 1.5189, + "step": 25 + }, + { + "epoch": 0.08498583569405099, + "grad_norm": 0.5226045846939087, + "learning_rate": 1.6415094339622643e-05, + "loss": 1.5515, + "step": 30 + }, + { + "epoch": 0.09915014164305949, + "grad_norm": 0.5186519622802734, + "learning_rate": 1.9245283018867924e-05, + "loss": 1.5296, + "step": 35 + }, + { + "epoch": 0.11331444759206799, + "grad_norm": 0.6856126189231873, + "learning_rate": 2.2075471698113208e-05, + "loss": 1.5209, + "step": 40 + }, + { + "epoch": 0.1274787535410765, + "grad_norm": 0.46186837553977966, + "learning_rate": 2.4905660377358492e-05, + "loss": 1.4754, + "step": 45 + }, + { + "epoch": 0.141643059490085, + "grad_norm": 0.5433881282806396, + "learning_rate": 2.7735849056603773e-05, + "loss": 1.4551, + "step": 50 + }, + { + "epoch": 0.1558073654390935, + "grad_norm": 0.482219934463501, + "learning_rate": 2.999992685835993e-05, + "loss": 1.4065, + "step": 55 + }, + { + "epoch": 0.16997167138810199, + "grad_norm": 0.5331265330314636, + "learning_rate": 2.9997366975852434e-05, + "loss": 1.4252, + "step": 60 + }, + { + "epoch": 0.18413597733711048, + "grad_norm": 0.7813170552253723, + "learning_rate": 2.999115072460336e-05, + "loss": 1.3291, + "step": 65 + }, + { + "epoch": 0.19830028328611898, + "grad_norm": 0.5390318632125854, + "learning_rate": 2.998127962013918e-05, + "loss": 1.4335, + "step": 70 + }, + { + "epoch": 0.21246458923512748, + "grad_norm": 0.5575202703475952, + "learning_rate": 2.9967756069042192e-05, + "loss": 1.3318, + "step": 75 + }, + { + "epoch": 0.22662889518413598, + "grad_norm": 0.6344984769821167, + "learning_rate": 2.9950583368363774e-05, + "loss": 1.2831, + "step": 80 + }, + { + "epoch": 0.24079320113314448, + "grad_norm": 0.6746723651885986, + "learning_rate": 2.9929765704820574e-05, + "loss": 1.293, + "step": 85 + }, + { + "epoch": 0.254957507082153, + "grad_norm": 0.7410789132118225, + "learning_rate": 2.9905308153773778e-05, + "loss": 1.2219, + "step": 90 + }, + { + "epoch": 0.26912181303116145, + "grad_norm": 0.6860549449920654, + "learning_rate": 2.9877216677991737e-05, + "loss": 1.2792, + "step": 95 + }, + { + "epoch": 0.28328611898017, + "grad_norm": 0.6441924571990967, + "learning_rate": 2.984549812619624e-05, + "loss": 1.232, + "step": 100 + }, + { + "epoch": 0.29745042492917845, + "grad_norm": 0.6719996929168701, + "learning_rate": 2.981016023139278e-05, + "loss": 1.2053, + "step": 105 + }, + { + "epoch": 0.311614730878187, + "grad_norm": 0.7621906399726868, + "learning_rate": 2.9771211608985268e-05, + "loss": 1.1308, + "step": 110 + }, + { + "epoch": 0.32577903682719545, + "grad_norm": 0.7402164340019226, + "learning_rate": 2.9728661754675553e-05, + "loss": 1.1357, + "step": 115 + }, + { + "epoch": 0.33994334277620397, + "grad_norm": 0.8971172571182251, + "learning_rate": 2.968252104214841e-05, + "loss": 1.0884, + "step": 120 + }, + { + "epoch": 0.35410764872521244, + "grad_norm": 0.8512996435165405, + "learning_rate": 2.963280072054238e-05, + "loss": 1.0927, + "step": 125 + }, + { + "epoch": 0.36827195467422097, + "grad_norm": 0.7619022727012634, + "learning_rate": 2.9579512911707257e-05, + "loss": 0.9949, + "step": 130 + }, + { + "epoch": 0.38243626062322944, + "grad_norm": 0.9913328886032104, + "learning_rate": 2.9522670607248758e-05, + "loss": 1.089, + "step": 135 + }, + { + "epoch": 0.39660056657223797, + "grad_norm": 0.919765293598175, + "learning_rate": 2.946228766536116e-05, + "loss": 1.0156, + "step": 140 + }, + { + "epoch": 0.41076487252124644, + "grad_norm": 0.8783844709396362, + "learning_rate": 2.939837880744866e-05, + "loss": 1.0346, + "step": 145 + }, + { + "epoch": 0.42492917847025496, + "grad_norm": 0.9114492535591125, + "learning_rate": 2.9330959614536314e-05, + "loss": 0.9629, + "step": 150 + }, + { + "epoch": 0.43909348441926344, + "grad_norm": 0.9207214117050171, + "learning_rate": 2.926004652347132e-05, + "loss": 0.9454, + "step": 155 + }, + { + "epoch": 0.45325779036827196, + "grad_norm": 0.9605975151062012, + "learning_rate": 2.9185656822915748e-05, + "loss": 0.9702, + "step": 160 + }, + { + "epoch": 0.46742209631728043, + "grad_norm": 0.8873898386955261, + "learning_rate": 2.910780864913153e-05, + "loss": 0.9548, + "step": 165 + }, + { + "epoch": 0.48158640226628896, + "grad_norm": 0.9931328892707825, + "learning_rate": 2.9026520981558844e-05, + "loss": 0.8941, + "step": 170 + }, + { + "epoch": 0.49575070821529743, + "grad_norm": 1.1021382808685303, + "learning_rate": 2.8941813638188887e-05, + "loss": 0.9074, + "step": 175 + }, + { + "epoch": 0.509915014164306, + "grad_norm": 1.0089614391326904, + "learning_rate": 2.8853707270732256e-05, + "loss": 0.8847, + "step": 180 + }, + { + "epoch": 0.5240793201133145, + "grad_norm": 1.0374699831008911, + "learning_rate": 2.8762223359584033e-05, + "loss": 0.8464, + "step": 185 + }, + { + "epoch": 0.5382436260623229, + "grad_norm": 0.9769449830055237, + "learning_rate": 2.8667384208586863e-05, + "loss": 0.8397, + "step": 190 + }, + { + "epoch": 0.5524079320113314, + "grad_norm": 0.9516706466674805, + "learning_rate": 2.8569212939593252e-05, + "loss": 0.7926, + "step": 195 + }, + { + "epoch": 0.56657223796034, + "grad_norm": 0.993419885635376, + "learning_rate": 2.8467733486828448e-05, + "loss": 0.8105, + "step": 200 + }, + { + "epoch": 0.5807365439093485, + "grad_norm": 0.9524616599082947, + "learning_rate": 2.8362970591055248e-05, + "loss": 0.7585, + "step": 205 + }, + { + "epoch": 0.5949008498583569, + "grad_norm": 1.0047383308410645, + "learning_rate": 2.8254949793542197e-05, + "loss": 0.7821, + "step": 210 + }, + { + "epoch": 0.6090651558073654, + "grad_norm": 1.0755858421325684, + "learning_rate": 2.81436974298366e-05, + "loss": 0.7581, + "step": 215 + }, + { + "epoch": 0.623229461756374, + "grad_norm": 1.0159295797348022, + "learning_rate": 2.8029240623343908e-05, + "loss": 0.7507, + "step": 220 + }, + { + "epoch": 0.6373937677053825, + "grad_norm": 1.0685749053955078, + "learning_rate": 2.791160727871499e-05, + "loss": 0.8104, + "step": 225 + }, + { + "epoch": 0.6515580736543909, + "grad_norm": 1.0083304643630981, + "learning_rate": 2.779082607504298e-05, + "loss": 0.7698, + "step": 230 + }, + { + "epoch": 0.6657223796033994, + "grad_norm": 1.0537629127502441, + "learning_rate": 2.7666926458871292e-05, + "loss": 0.6838, + "step": 235 + }, + { + "epoch": 0.6798866855524079, + "grad_norm": 1.1340935230255127, + "learning_rate": 2.7539938637014517e-05, + "loss": 0.6464, + "step": 240 + }, + { + "epoch": 0.6940509915014165, + "grad_norm": 1.143169641494751, + "learning_rate": 2.7409893569193998e-05, + "loss": 0.6537, + "step": 245 + }, + { + "epoch": 0.7082152974504249, + "grad_norm": 1.126880407333374, + "learning_rate": 2.7276822960489815e-05, + "loss": 0.6923, + "step": 250 + }, + { + "epoch": 0.7223796033994334, + "grad_norm": 1.1735341548919678, + "learning_rate": 2.7140759253611067e-05, + "loss": 0.728, + "step": 255 + }, + { + "epoch": 0.7365439093484419, + "grad_norm": 1.1682144403457642, + "learning_rate": 2.7001735620986323e-05, + "loss": 0.6166, + "step": 260 + }, + { + "epoch": 0.7507082152974505, + "grad_norm": 1.2577579021453857, + "learning_rate": 2.6859785956676157e-05, + "loss": 0.6132, + "step": 265 + }, + { + "epoch": 0.7648725212464589, + "grad_norm": 1.1497503519058228, + "learning_rate": 2.6714944868109744e-05, + "loss": 0.6363, + "step": 270 + }, + { + "epoch": 0.7790368271954674, + "grad_norm": 1.8539701700210571, + "learning_rate": 2.6567247667647545e-05, + "loss": 0.6361, + "step": 275 + }, + { + "epoch": 0.7932011331444759, + "grad_norm": 1.2194472551345825, + "learning_rate": 2.641673036397215e-05, + "loss": 0.5611, + "step": 280 + }, + { + "epoch": 0.8073654390934845, + "grad_norm": 1.1129488945007324, + "learning_rate": 2.626342965330931e-05, + "loss": 0.6034, + "step": 285 + }, + { + "epoch": 0.8215297450424929, + "grad_norm": 1.349648118019104, + "learning_rate": 2.6107382910481377e-05, + "loss": 0.5918, + "step": 290 + }, + { + "epoch": 0.8356940509915014, + "grad_norm": 1.2663531303405762, + "learning_rate": 2.5948628179795307e-05, + "loss": 0.5766, + "step": 295 + }, + { + "epoch": 0.8498583569405099, + "grad_norm": 1.1392652988433838, + "learning_rate": 2.5787204165767414e-05, + "loss": 0.5432, + "step": 300 + }, + { + "epoch": 0.8640226628895185, + "grad_norm": 1.3496203422546387, + "learning_rate": 2.56231502236872e-05, + "loss": 0.5922, + "step": 305 + }, + { + "epoch": 0.8781869688385269, + "grad_norm": 1.1402281522750854, + "learning_rate": 2.5456506350022493e-05, + "loss": 0.4701, + "step": 310 + }, + { + "epoch": 0.8923512747875354, + "grad_norm": 1.2046688795089722, + "learning_rate": 2.5287313172668283e-05, + "loss": 0.5505, + "step": 315 + }, + { + "epoch": 0.9065155807365439, + "grad_norm": 1.1690924167633057, + "learning_rate": 2.511561194104161e-05, + "loss": 0.5568, + "step": 320 + }, + { + "epoch": 0.9206798866855525, + "grad_norm": 1.11336350440979, + "learning_rate": 2.494144451602495e-05, + "loss": 0.5077, + "step": 325 + }, + { + "epoch": 0.9348441926345609, + "grad_norm": 1.17302668094635, + "learning_rate": 2.4764853359760448e-05, + "loss": 0.5476, + "step": 330 + }, + { + "epoch": 0.9490084985835694, + "grad_norm": 1.1316598653793335, + "learning_rate": 2.458588152529769e-05, + "loss": 0.5266, + "step": 335 + }, + { + "epoch": 0.9631728045325779, + "grad_norm": 1.1120532751083374, + "learning_rate": 2.440457264609727e-05, + "loss": 0.5006, + "step": 340 + }, + { + "epoch": 0.9773371104815864, + "grad_norm": 1.247570514678955, + "learning_rate": 2.4220970925392984e-05, + "loss": 0.5129, + "step": 345 + }, + { + "epoch": 0.9915014164305949, + "grad_norm": 1.2267568111419678, + "learning_rate": 2.403512112541498e-05, + "loss": 0.5207, + "step": 350 + }, + { + "epoch": 1.0056657223796035, + "grad_norm": 1.2525947093963623, + "learning_rate": 2.384706855647676e-05, + "loss": 0.4833, + "step": 355 + }, + { + "epoch": 1.019830028328612, + "grad_norm": 1.2962859869003296, + "learning_rate": 2.365685906592846e-05, + "loss": 0.3893, + "step": 360 + }, + { + "epoch": 1.0339943342776203, + "grad_norm": 1.1921623945236206, + "learning_rate": 2.3464539026979235e-05, + "loss": 0.451, + "step": 365 + }, + { + "epoch": 1.048158640226629, + "grad_norm": 1.1100825071334839, + "learning_rate": 2.327015532739145e-05, + "loss": 0.3921, + "step": 370 + }, + { + "epoch": 1.0623229461756374, + "grad_norm": 1.4870363473892212, + "learning_rate": 2.3073755358049395e-05, + "loss": 0.3923, + "step": 375 + }, + { + "epoch": 1.0764872521246458, + "grad_norm": 1.1867355108261108, + "learning_rate": 2.2875387001405366e-05, + "loss": 0.4434, + "step": 380 + }, + { + "epoch": 1.0906515580736544, + "grad_norm": 1.198502540588379, + "learning_rate": 2.2675098619805877e-05, + "loss": 0.3837, + "step": 385 + }, + { + "epoch": 1.1048158640226629, + "grad_norm": 1.1964575052261353, + "learning_rate": 2.2472939043700896e-05, + "loss": 0.3664, + "step": 390 + }, + { + "epoch": 1.1189801699716715, + "grad_norm": 1.2928706407546997, + "learning_rate": 2.2268957559738947e-05, + "loss": 0.3694, + "step": 395 + }, + { + "epoch": 1.13314447592068, + "grad_norm": 1.1911778450012207, + "learning_rate": 2.2063203898750987e-05, + "loss": 0.4033, + "step": 400 + }, + { + "epoch": 1.1473087818696883, + "grad_norm": 1.263422966003418, + "learning_rate": 2.1855728223625986e-05, + "loss": 0.3695, + "step": 405 + }, + { + "epoch": 1.161473087818697, + "grad_norm": 1.2540466785430908, + "learning_rate": 2.1646581117081185e-05, + "loss": 0.3253, + "step": 410 + }, + { + "epoch": 1.1756373937677054, + "grad_norm": 1.464640498161316, + "learning_rate": 2.1435813569330012e-05, + "loss": 0.3773, + "step": 415 + }, + { + "epoch": 1.1898016997167138, + "grad_norm": 1.205965280532837, + "learning_rate": 2.1223476965650586e-05, + "loss": 0.3633, + "step": 420 + }, + { + "epoch": 1.2039660056657224, + "grad_norm": 1.3502845764160156, + "learning_rate": 2.1009623073858003e-05, + "loss": 0.3125, + "step": 425 + }, + { + "epoch": 1.2181303116147308, + "grad_norm": 1.3016825914382935, + "learning_rate": 2.0794304031683267e-05, + "loss": 0.3309, + "step": 430 + }, + { + "epoch": 1.2322946175637393, + "grad_norm": 1.171027660369873, + "learning_rate": 2.0577572334062094e-05, + "loss": 0.2895, + "step": 435 + }, + { + "epoch": 1.246458923512748, + "grad_norm": 1.0805751085281372, + "learning_rate": 2.0359480820336596e-05, + "loss": 0.3621, + "step": 440 + }, + { + "epoch": 1.2606232294617563, + "grad_norm": 1.1755497455596924, + "learning_rate": 2.0140082661373034e-05, + "loss": 0.3247, + "step": 445 + }, + { + "epoch": 1.274787535410765, + "grad_norm": 1.2279369831085205, + "learning_rate": 1.9919431346598688e-05, + "loss": 0.3498, + "step": 450 + }, + { + "epoch": 1.2889518413597734, + "grad_norm": 1.4184590578079224, + "learning_rate": 1.969758067096113e-05, + "loss": 0.3389, + "step": 455 + }, + { + "epoch": 1.3031161473087818, + "grad_norm": 1.2515075206756592, + "learning_rate": 1.947458472181296e-05, + "loss": 0.3537, + "step": 460 + }, + { + "epoch": 1.3172804532577904, + "grad_norm": 1.218990683555603, + "learning_rate": 1.925049786572528e-05, + "loss": 0.344, + "step": 465 + }, + { + "epoch": 1.3314447592067988, + "grad_norm": 1.3087550401687622, + "learning_rate": 1.9025374735233067e-05, + "loss": 0.3051, + "step": 470 + }, + { + "epoch": 1.3456090651558075, + "grad_norm": 1.1275168657302856, + "learning_rate": 1.8799270215515756e-05, + "loss": 0.346, + "step": 475 + }, + { + "epoch": 1.3597733711048159, + "grad_norm": 1.1507729291915894, + "learning_rate": 1.8572239431016146e-05, + "loss": 0.3168, + "step": 480 + }, + { + "epoch": 1.3739376770538243, + "grad_norm": 1.342016577720642, + "learning_rate": 1.8344337732001073e-05, + "loss": 0.2736, + "step": 485 + }, + { + "epoch": 1.388101983002833, + "grad_norm": 1.146517038345337, + "learning_rate": 1.8115620681066946e-05, + "loss": 0.286, + "step": 490 + }, + { + "epoch": 1.4022662889518414, + "grad_norm": 1.3251285552978516, + "learning_rate": 1.7886144039593537e-05, + "loss": 0.286, + "step": 495 + }, + { + "epoch": 1.41643059490085, + "grad_norm": 1.184571385383606, + "learning_rate": 1.765596375414936e-05, + "loss": 0.2697, + "step": 500 + }, + { + "epoch": 1.4305949008498584, + "grad_norm": 1.4646106958389282, + "learning_rate": 1.74251359428518e-05, + "loss": 0.2859, + "step": 505 + }, + { + "epoch": 1.4447592067988668, + "grad_norm": 1.333466649055481, + "learning_rate": 1.7193716881685534e-05, + "loss": 0.3096, + "step": 510 + }, + { + "epoch": 1.4589235127478752, + "grad_norm": 1.2950023412704468, + "learning_rate": 1.6961762990782346e-05, + "loss": 0.2955, + "step": 515 + }, + { + "epoch": 1.4730878186968839, + "grad_norm": 1.2350502014160156, + "learning_rate": 1.6729330820665924e-05, + "loss": 0.2712, + "step": 520 + }, + { + "epoch": 1.4872521246458923, + "grad_norm": 1.168866515159607, + "learning_rate": 1.6496477038464743e-05, + "loss": 0.2726, + "step": 525 + }, + { + "epoch": 1.501416430594901, + "grad_norm": 1.139028549194336, + "learning_rate": 1.626325841409662e-05, + "loss": 0.2953, + "step": 530 + }, + { + "epoch": 1.5155807365439093, + "grad_norm": 1.2334939241409302, + "learning_rate": 1.602973180642814e-05, + "loss": 0.2933, + "step": 535 + }, + { + "epoch": 1.5297450424929178, + "grad_norm": 1.3800495862960815, + "learning_rate": 1.5795954149412445e-05, + "loss": 0.2478, + "step": 540 + }, + { + "epoch": 1.5439093484419264, + "grad_norm": 1.1974931955337524, + "learning_rate": 1.5561982438208685e-05, + "loss": 0.2675, + "step": 545 + }, + { + "epoch": 1.5580736543909348, + "grad_norm": 1.1484897136688232, + "learning_rate": 1.5327873715286553e-05, + "loss": 0.2616, + "step": 550 + }, + { + "epoch": 1.5722379603399435, + "grad_norm": 1.4212889671325684, + "learning_rate": 1.5093685056519305e-05, + "loss": 0.2193, + "step": 555 + }, + { + "epoch": 1.5864022662889519, + "grad_norm": 1.2573003768920898, + "learning_rate": 1.4859473557268607e-05, + "loss": 0.237, + "step": 560 + }, + { + "epoch": 1.6005665722379603, + "grad_norm": 1.2670422792434692, + "learning_rate": 1.4625296318464652e-05, + "loss": 0.2089, + "step": 565 + }, + { + "epoch": 1.6147308781869687, + "grad_norm": 1.1792638301849365, + "learning_rate": 1.4391210432684912e-05, + "loss": 0.2831, + "step": 570 + }, + { + "epoch": 1.6288951841359773, + "grad_norm": 1.3337515592575073, + "learning_rate": 1.4157272970234925e-05, + "loss": 0.2239, + "step": 575 + }, + { + "epoch": 1.643059490084986, + "grad_norm": 1.3405400514602661, + "learning_rate": 1.3923540965234527e-05, + "loss": 0.2247, + "step": 580 + }, + { + "epoch": 1.6572237960339944, + "grad_norm": 1.1143791675567627, + "learning_rate": 1.3690071401712863e-05, + "loss": 0.2231, + "step": 585 + }, + { + "epoch": 1.6713881019830028, + "grad_norm": 1.1400721073150635, + "learning_rate": 1.345692119971567e-05, + "loss": 0.2282, + "step": 590 + }, + { + "epoch": 1.6855524079320112, + "grad_norm": 1.2618237733840942, + "learning_rate": 1.322414720142812e-05, + "loss": 0.2387, + "step": 595 + }, + { + "epoch": 1.6997167138810199, + "grad_norm": 1.1092174053192139, + "learning_rate": 1.2991806157316647e-05, + "loss": 0.2038, + "step": 600 + }, + { + "epoch": 1.7138810198300283, + "grad_norm": 1.0898807048797607, + "learning_rate": 1.2759954712293147e-05, + "loss": 0.1852, + "step": 605 + }, + { + "epoch": 1.728045325779037, + "grad_norm": 1.0032293796539307, + "learning_rate": 1.2528649391904928e-05, + "loss": 0.2065, + "step": 610 + }, + { + "epoch": 1.7422096317280453, + "grad_norm": 1.0647224187850952, + "learning_rate": 1.2297946588553688e-05, + "loss": 0.1967, + "step": 615 + }, + { + "epoch": 1.7563739376770537, + "grad_norm": 1.2690836191177368, + "learning_rate": 1.2067902547747076e-05, + "loss": 0.2067, + "step": 620 + }, + { + "epoch": 1.7705382436260622, + "grad_norm": 1.2006828784942627, + "learning_rate": 1.1838573354385947e-05, + "loss": 0.2451, + "step": 625 + }, + { + "epoch": 1.7847025495750708, + "grad_norm": 1.267482042312622, + "learning_rate": 1.1610014919090847e-05, + "loss": 0.2103, + "step": 630 + }, + { + "epoch": 1.7988668555240794, + "grad_norm": 1.3417218923568726, + "learning_rate": 1.1382282964570956e-05, + "loss": 0.1951, + "step": 635 + }, + { + "epoch": 1.8130311614730878, + "grad_norm": 1.150628685951233, + "learning_rate": 1.1155433012038847e-05, + "loss": 0.205, + "step": 640 + }, + { + "epoch": 1.8271954674220963, + "grad_norm": 1.113037347793579, + "learning_rate": 1.0929520367674389e-05, + "loss": 0.2084, + "step": 645 + }, + { + "epoch": 1.8413597733711047, + "grad_norm": 1.3205279111862183, + "learning_rate": 1.0704600109141043e-05, + "loss": 0.1717, + "step": 650 + }, + { + "epoch": 1.8555240793201133, + "grad_norm": 1.1768556833267212, + "learning_rate": 1.0480727072157912e-05, + "loss": 0.1978, + "step": 655 + }, + { + "epoch": 1.869688385269122, + "grad_norm": 1.2845211029052734, + "learning_rate": 1.0257955837130725e-05, + "loss": 0.196, + "step": 660 + }, + { + "epoch": 1.8838526912181304, + "grad_norm": 1.101710319519043, + "learning_rate": 1.0036340715845118e-05, + "loss": 0.2125, + "step": 665 + }, + { + "epoch": 1.8980169971671388, + "grad_norm": 1.134294867515564, + "learning_rate": 9.815935738225376e-06, + "loss": 0.1948, + "step": 670 + }, + { + "epoch": 1.9121813031161472, + "grad_norm": 1.3456953763961792, + "learning_rate": 9.596794639161892e-06, + "loss": 0.1899, + "step": 675 + }, + { + "epoch": 1.9263456090651558, + "grad_norm": 1.3517192602157593, + "learning_rate": 9.37897084541057e-06, + "loss": 0.1683, + "step": 680 + }, + { + "epoch": 1.9405099150141643, + "grad_norm": 1.0507384538650513, + "learning_rate": 9.16251746256734e-06, + "loss": 0.154, + "step": 685 + }, + { + "epoch": 1.954674220963173, + "grad_norm": 1.047144889831543, + "learning_rate": 8.94748726212097e-06, + "loss": 0.1699, + "step": 690 + }, + { + "epoch": 1.9688385269121813, + "grad_norm": 1.1450303792953491, + "learning_rate": 8.733932668587371e-06, + "loss": 0.1657, + "step": 695 + }, + { + "epoch": 1.9830028328611897, + "grad_norm": 1.1669367551803589, + "learning_rate": 8.521905746728408e-06, + "loss": 0.1532, + "step": 700 + }, + { + "epoch": 1.9971671388101981, + "grad_norm": 1.1999348402023315, + "learning_rate": 8.311458188858525e-06, + "loss": 0.1479, + "step": 705 + }, + { + "epoch": 2.011331444759207, + "grad_norm": 1.0395151376724243, + "learning_rate": 8.102641302242104e-06, + "loss": 0.1635, + "step": 710 + }, + { + "epoch": 2.0254957507082154, + "grad_norm": 1.1235429048538208, + "learning_rate": 7.89550599658469e-06, + "loss": 0.1404, + "step": 715 + }, + { + "epoch": 2.039660056657224, + "grad_norm": 0.8213925361633301, + "learning_rate": 7.69010277162122e-06, + "loss": 0.1236, + "step": 720 + }, + { + "epoch": 2.0538243626062322, + "grad_norm": 0.9484227895736694, + "learning_rate": 7.486481704804117e-06, + "loss": 0.114, + "step": 725 + }, + { + "epoch": 2.0679886685552407, + "grad_norm": 0.9512398838996887, + "learning_rate": 7.284692439094369e-06, + "loss": 0.1185, + "step": 730 + }, + { + "epoch": 2.0821529745042495, + "grad_norm": 1.0123506784439087, + "learning_rate": 7.084784170858566e-06, + "loss": 0.1329, + "step": 735 + }, + { + "epoch": 2.096317280453258, + "grad_norm": 0.9752940535545349, + "learning_rate": 6.8868056378747715e-06, + "loss": 0.1272, + "step": 740 + }, + { + "epoch": 2.1104815864022664, + "grad_norm": 1.1549034118652344, + "learning_rate": 6.690805107450209e-06, + "loss": 0.1339, + "step": 745 + }, + { + "epoch": 2.1246458923512748, + "grad_norm": 1.4165195226669312, + "learning_rate": 6.496830364653691e-06, + "loss": 0.1456, + "step": 750 + }, + { + "epoch": 2.138810198300283, + "grad_norm": 1.0737555027008057, + "learning_rate": 6.304928700665545e-06, + "loss": 0.1371, + "step": 755 + }, + { + "epoch": 2.1529745042492916, + "grad_norm": 0.9517489075660706, + "learning_rate": 6.115146901248015e-06, + "loss": 0.1159, + "step": 760 + }, + { + "epoch": 2.1671388101983005, + "grad_norm": 0.8344389796257019, + "learning_rate": 5.9275312353388635e-06, + "loss": 0.1181, + "step": 765 + }, + { + "epoch": 2.181303116147309, + "grad_norm": 0.9729456901550293, + "learning_rate": 5.7421274437709586e-06, + "loss": 0.1241, + "step": 770 + }, + { + "epoch": 2.1954674220963173, + "grad_norm": 0.9029666781425476, + "learning_rate": 5.558980728120618e-06, + "loss": 0.1004, + "step": 775 + }, + { + "epoch": 2.2096317280453257, + "grad_norm": 1.1208617687225342, + "learning_rate": 5.378135739687457e-06, + "loss": 0.1463, + "step": 780 + }, + { + "epoch": 2.223796033994334, + "grad_norm": 0.8783971667289734, + "learning_rate": 5.199636568608363e-06, + "loss": 0.1127, + "step": 785 + }, + { + "epoch": 2.237960339943343, + "grad_norm": 1.0620113611221313, + "learning_rate": 5.023526733108258e-06, + "loss": 0.1127, + "step": 790 + }, + { + "epoch": 2.2521246458923514, + "grad_norm": 0.9393536448478699, + "learning_rate": 4.849849168890375e-06, + "loss": 0.1064, + "step": 795 + }, + { + "epoch": 2.26628895184136, + "grad_norm": 0.898922860622406, + "learning_rate": 4.678646218668473e-06, + "loss": 0.0929, + "step": 800 + }, + { + "epoch": 2.2804532577903682, + "grad_norm": 1.0720484256744385, + "learning_rate": 4.509959621843638e-06, + "loss": 0.1099, + "step": 805 + }, + { + "epoch": 2.2946175637393766, + "grad_norm": 0.8173096179962158, + "learning_rate": 4.3438305043282315e-06, + "loss": 0.0993, + "step": 810 + }, + { + "epoch": 2.308781869688385, + "grad_norm": 0.9477612376213074, + "learning_rate": 4.180299368519332e-06, + "loss": 0.1126, + "step": 815 + }, + { + "epoch": 2.322946175637394, + "grad_norm": 1.1620317697525024, + "learning_rate": 4.019406083424222e-06, + "loss": 0.1304, + "step": 820 + }, + { + "epoch": 2.3371104815864023, + "grad_norm": 1.047582745552063, + "learning_rate": 3.861189874940302e-06, + "loss": 0.1073, + "step": 825 + }, + { + "epoch": 2.3512747875354107, + "grad_norm": 0.8818837404251099, + "learning_rate": 3.7056893162918064e-06, + "loss": 0.1073, + "step": 830 + }, + { + "epoch": 2.365439093484419, + "grad_norm": 1.0614814758300781, + "learning_rate": 3.5529423186255833e-06, + "loss": 0.1402, + "step": 835 + }, + { + "epoch": 2.3796033994334276, + "grad_norm": 1.2329021692276, + "learning_rate": 3.4029861217683743e-06, + "loss": 0.1125, + "step": 840 + }, + { + "epoch": 2.3937677053824364, + "grad_norm": 0.9971024394035339, + "learning_rate": 3.2558572851476903e-06, + "loss": 0.1124, + "step": 845 + }, + { + "epoch": 2.407932011331445, + "grad_norm": 0.7330763339996338, + "learning_rate": 3.111591678878596e-06, + "loss": 0.1146, + "step": 850 + }, + { + "epoch": 2.4220963172804533, + "grad_norm": 1.0510387420654297, + "learning_rate": 2.9702244750185724e-06, + "loss": 0.1101, + "step": 855 + }, + { + "epoch": 2.4362606232294617, + "grad_norm": 0.9393291473388672, + "learning_rate": 2.831790138992526e-06, + "loss": 0.1009, + "step": 860 + }, + { + "epoch": 2.45042492917847, + "grad_norm": 0.7127835750579834, + "learning_rate": 2.696322421190091e-06, + "loss": 0.0884, + "step": 865 + }, + { + "epoch": 2.4645892351274785, + "grad_norm": 0.760395884513855, + "learning_rate": 2.563854348737275e-06, + "loss": 0.1116, + "step": 870 + }, + { + "epoch": 2.4787535410764874, + "grad_norm": 1.0397846698760986, + "learning_rate": 2.434418217444419e-06, + "loss": 0.1104, + "step": 875 + }, + { + "epoch": 2.492917847025496, + "grad_norm": 0.9772678017616272, + "learning_rate": 2.3080455839324342e-06, + "loss": 0.1153, + "step": 880 + }, + { + "epoch": 2.507082152974504, + "grad_norm": 0.7183725237846375, + "learning_rate": 2.184767257939312e-06, + "loss": 0.097, + "step": 885 + }, + { + "epoch": 2.5212464589235126, + "grad_norm": 0.7582363486289978, + "learning_rate": 2.064613294808664e-06, + "loss": 0.0996, + "step": 890 + }, + { + "epoch": 2.5354107648725215, + "grad_norm": 0.7273741960525513, + "learning_rate": 1.947612988162197e-06, + "loss": 0.1043, + "step": 895 + }, + { + "epoch": 2.54957507082153, + "grad_norm": 0.8927783370018005, + "learning_rate": 1.8337948627579398e-06, + "loss": 0.1085, + "step": 900 + }, + { + "epoch": 2.5637393767705383, + "grad_norm": 0.8515971302986145, + "learning_rate": 1.7231866675358704e-06, + "loss": 0.0957, + "step": 905 + }, + { + "epoch": 2.5779036827195467, + "grad_norm": 0.7225885987281799, + "learning_rate": 1.6158153688526893e-06, + "loss": 0.0949, + "step": 910 + }, + { + "epoch": 2.592067988668555, + "grad_norm": 0.7432470917701721, + "learning_rate": 1.5117071439074305e-06, + "loss": 0.0907, + "step": 915 + }, + { + "epoch": 2.6062322946175636, + "grad_norm": 0.8148579597473145, + "learning_rate": 1.4108873743594275e-06, + "loss": 0.1206, + "step": 920 + }, + { + "epoch": 2.620396600566572, + "grad_norm": 0.8324195146560669, + "learning_rate": 1.3133806401402376e-06, + "loss": 0.0969, + "step": 925 + }, + { + "epoch": 2.634560906515581, + "grad_norm": 0.7272660136222839, + "learning_rate": 1.2192107134610586e-06, + "loss": 0.0993, + "step": 930 + }, + { + "epoch": 2.6487252124645893, + "grad_norm": 0.6482411026954651, + "learning_rate": 1.1284005530170305e-06, + "loss": 0.0901, + "step": 935 + }, + { + "epoch": 2.6628895184135977, + "grad_norm": 0.9842537641525269, + "learning_rate": 1.0409722983898928e-06, + "loss": 0.1282, + "step": 940 + }, + { + "epoch": 2.677053824362606, + "grad_norm": 0.7460909485816956, + "learning_rate": 9.569472646503424e-07, + "loss": 0.0966, + "step": 945 + }, + { + "epoch": 2.691218130311615, + "grad_norm": 0.7169206142425537, + "learning_rate": 8.763459371614036e-07, + "loss": 0.0837, + "step": 950 + }, + { + "epoch": 2.7053824362606234, + "grad_norm": 0.7621840834617615, + "learning_rate": 7.991879665840745e-07, + "loss": 0.1044, + "step": 955 + }, + { + "epoch": 2.7195467422096318, + "grad_norm": 0.6485282182693481, + "learning_rate": 7.254921640864953e-07, + "loss": 0.0853, + "step": 960 + }, + { + "epoch": 2.73371104815864, + "grad_norm": 0.7247589826583862, + "learning_rate": 6.55276496757759e-07, + "loss": 0.0855, + "step": 965 + }, + { + "epoch": 2.7478753541076486, + "grad_norm": 0.8195169568061829, + "learning_rate": 5.885580832275244e-07, + "loss": 0.0931, + "step": 970 + }, + { + "epoch": 2.762039660056657, + "grad_norm": 0.7408338785171509, + "learning_rate": 5.253531894924962e-07, + "loss": 0.0948, + "step": 975 + }, + { + "epoch": 2.776203966005666, + "grad_norm": 0.7429975271224976, + "learning_rate": 4.6567722495074685e-07, + "loss": 0.0929, + "step": 980 + }, + { + "epoch": 2.7903682719546743, + "grad_norm": 0.8037291169166565, + "learning_rate": 4.0954473864489693e-07, + "loss": 0.0901, + "step": 985 + }, + { + "epoch": 2.8045325779036827, + "grad_norm": 0.8908680081367493, + "learning_rate": 3.5696941571505436e-07, + "loss": 0.0943, + "step": 990 + }, + { + "epoch": 2.818696883852691, + "grad_norm": 0.607088029384613, + "learning_rate": 3.079640740623679e-07, + "loss": 0.0753, + "step": 995 + }, + { + "epoch": 2.8328611898017, + "grad_norm": 0.6386492848396301, + "learning_rate": 2.625406612240039e-07, + "loss": 0.0969, + "step": 1000 + }, + { + "epoch": 2.8470254957507084, + "grad_norm": 0.9213368892669678, + "learning_rate": 2.207102514603393e-07, + "loss": 0.1003, + "step": 1005 + }, + { + "epoch": 2.861189801699717, + "grad_norm": 0.7016822695732117, + "learning_rate": 1.8248304305504505e-07, + "loss": 0.0819, + "step": 1010 + }, + { + "epoch": 2.8753541076487252, + "grad_norm": 0.7725436687469482, + "learning_rate": 1.4786835582873137e-07, + "loss": 0.1167, + "step": 1015 + }, + { + "epoch": 2.8895184135977336, + "grad_norm": 0.7689378261566162, + "learning_rate": 1.1687462886677713e-07, + "loss": 0.098, + "step": 1020 + }, + { + "epoch": 2.903682719546742, + "grad_norm": 0.6145092844963074, + "learning_rate": 8.950941846187721e-08, + "loss": 0.0986, + "step": 1025 + }, + { + "epoch": 2.9178470254957505, + "grad_norm": 0.6849756240844727, + "learning_rate": 6.577939627179785e-08, + "loss": 0.0928, + "step": 1030 + }, + { + "epoch": 2.9320113314447593, + "grad_norm": 0.5891423225402832, + "learning_rate": 4.5690347692837755e-08, + "loss": 0.096, + "step": 1035 + }, + { + "epoch": 2.9461756373937678, + "grad_norm": 0.722785472869873, + "learning_rate": 2.9247170449338e-08, + "loss": 0.0966, + "step": 1040 + }, + { + "epoch": 2.960339943342776, + "grad_norm": 0.7861665487289429, + "learning_rate": 1.6453873399610576e-08, + "loss": 0.0898, + "step": 1045 + }, + { + "epoch": 2.9745042492917846, + "grad_norm": 0.746281623840332, + "learning_rate": 7.313575558583474e-09, + "loss": 0.0833, + "step": 1050 + }, + { + "epoch": 2.9886685552407934, + "grad_norm": 0.7537555694580078, + "learning_rate": 1.8285053373706673e-09, + "loss": 0.0998, + "step": 1055 + }, + { + "epoch": 3.0, + "step": 1059, + "total_flos": 1.375168712117584e+18, + "train_loss": 0.44531178133908705, + "train_runtime": 594.5009, + "train_samples_per_second": 56.962, + "train_steps_per_second": 1.781 + } + ], + "logging_steps": 5, + "max_steps": 1059, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.375168712117584e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..11c2469b47f9cd3591307989c633970e7aa5d898 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/8_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f387af027dc13088639d19d526094a302121f13fdd18112b48757de05e4fd42 +size 8273 diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/README.md b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6e812dc9fa124d0a55a8df021cf2dad833972c13 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/hotpotqa/train/knowledge_50 +model-index: +- name: 9_128_e3_3e-5 + results: [] +--- + + + +# 9_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/hotpotqa/train/knowledge_50 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- total_eval_batch_size: 64 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/adapter_config.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1501ee138e682524a39871d17251e67ce2a798d0 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "down_proj", + "o_proj", + "v_proj", + "gate_proj", + "k_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/adapter_model.safetensors b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1138dbc75f228c16dcd4fe86c45ba1e3956e6789 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88977f9d926dbd3acf62e8ed4a7d8b41b3423b48bc931618df3b04033fa6367b +size 671150064 diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/all_results.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9276f2da32938250f86bfe16cfd8ede37c0bde5d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2911580544436797e+18, + "train_loss": 0.4547977649801692, + "train_runtime": 560.3042, + "train_samples": 10993, + "train_samples_per_second": 58.859, + "train_steps_per_second": 1.842 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/chat_template.jinja b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/config.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/special_tokens_map.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer_config.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/train_results.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9276f2da32938250f86bfe16cfd8ede37c0bde5d --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.2911580544436797e+18, + "train_loss": 0.4547977649801692, + "train_runtime": 560.3042, + "train_samples": 10993, + "train_samples_per_second": 58.859, + "train_steps_per_second": 1.842 +} \ No newline at end of file diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/trainer_state.json b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..70a1df60ec70439d7a04408bf57f975dac3973ed --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/trainer_state.json @@ -0,0 +1,1485 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1032, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.014534883720930232, + "grad_norm": 0.7206381559371948, + "learning_rate": 2.307692307692308e-06, + "loss": 1.6669, + "step": 5 + }, + { + "epoch": 0.029069767441860465, + "grad_norm": 0.640861451625824, + "learning_rate": 5.192307692307692e-06, + "loss": 1.6412, + "step": 10 + }, + { + "epoch": 0.0436046511627907, + "grad_norm": 0.627522349357605, + "learning_rate": 8.076923076923077e-06, + "loss": 1.7331, + "step": 15 + }, + { + "epoch": 0.05813953488372093, + "grad_norm": 0.5616913437843323, + "learning_rate": 1.0961538461538462e-05, + "loss": 1.6036, + "step": 20 + }, + { + "epoch": 0.07267441860465117, + "grad_norm": 0.4732136130332947, + "learning_rate": 1.3846153846153847e-05, + "loss": 1.6481, + "step": 25 + }, + { + "epoch": 0.0872093023255814, + "grad_norm": 0.48391854763031006, + "learning_rate": 1.673076923076923e-05, + "loss": 1.6794, + "step": 30 + }, + { + "epoch": 0.10174418604651163, + "grad_norm": 0.4557724893093109, + "learning_rate": 1.9615384615384617e-05, + "loss": 1.5866, + "step": 35 + }, + { + "epoch": 0.11627906976744186, + "grad_norm": 0.4915844798088074, + "learning_rate": 2.25e-05, + "loss": 1.5845, + "step": 40 + }, + { + "epoch": 0.1308139534883721, + "grad_norm": 0.4952607750892639, + "learning_rate": 2.5384615384615386e-05, + "loss": 1.5462, + "step": 45 + }, + { + "epoch": 0.14534883720930233, + "grad_norm": 0.5318689346313477, + "learning_rate": 2.8269230769230768e-05, + "loss": 1.4998, + "step": 50 + }, + { + "epoch": 0.15988372093023256, + "grad_norm": 0.5693638324737549, + "learning_rate": 2.999969170437549e-05, + "loss": 1.4929, + "step": 55 + }, + { + "epoch": 0.1744186046511628, + "grad_norm": 0.5858587026596069, + "learning_rate": 2.99962235241376e-05, + "loss": 1.3714, + "step": 60 + }, + { + "epoch": 0.18895348837209303, + "grad_norm": 0.6174035668373108, + "learning_rate": 2.9988902688106014e-05, + "loss": 1.4556, + "step": 65 + }, + { + "epoch": 0.20348837209302326, + "grad_norm": 0.6102719902992249, + "learning_rate": 2.9977731077065013e-05, + "loss": 1.4014, + "step": 70 + }, + { + "epoch": 0.2180232558139535, + "grad_norm": 0.6282282471656799, + "learning_rate": 2.996271156109531e-05, + "loss": 1.3691, + "step": 75 + }, + { + "epoch": 0.23255813953488372, + "grad_norm": 0.6023404002189636, + "learning_rate": 2.9943847998836723e-05, + "loss": 1.3208, + "step": 80 + }, + { + "epoch": 0.24709302325581395, + "grad_norm": 0.6220393776893616, + "learning_rate": 2.992114523649686e-05, + "loss": 1.2862, + "step": 85 + }, + { + "epoch": 0.2616279069767442, + "grad_norm": 0.6808298230171204, + "learning_rate": 2.9894609106606067e-05, + "loss": 1.2945, + "step": 90 + }, + { + "epoch": 0.2761627906976744, + "grad_norm": 0.7573732137680054, + "learning_rate": 2.9864246426519023e-05, + "loss": 1.2738, + "step": 95 + }, + { + "epoch": 0.29069767441860467, + "grad_norm": 0.7976464033126831, + "learning_rate": 2.983006499666329e-05, + "loss": 1.1874, + "step": 100 + }, + { + "epoch": 0.30523255813953487, + "grad_norm": 0.8071194291114807, + "learning_rate": 2.9792073598535322e-05, + "loss": 1.1452, + "step": 105 + }, + { + "epoch": 0.31976744186046513, + "grad_norm": 0.8134217262268066, + "learning_rate": 2.9750281992444442e-05, + "loss": 1.134, + "step": 110 + }, + { + "epoch": 0.33430232558139533, + "grad_norm": 0.8804463744163513, + "learning_rate": 2.970470091500531e-05, + "loss": 1.0964, + "step": 115 + }, + { + "epoch": 0.3488372093023256, + "grad_norm": 0.9351754784584045, + "learning_rate": 2.9655342076379596e-05, + "loss": 1.0307, + "step": 120 + }, + { + "epoch": 0.3633720930232558, + "grad_norm": 0.8489453196525574, + "learning_rate": 2.9602218157267572e-05, + "loss": 1.0253, + "step": 125 + }, + { + "epoch": 0.37790697674418605, + "grad_norm": 0.7934521436691284, + "learning_rate": 2.9545342805650304e-05, + "loss": 1.0798, + "step": 130 + }, + { + "epoch": 0.39244186046511625, + "grad_norm": 0.8705933690071106, + "learning_rate": 2.9484730633283385e-05, + "loss": 1.0175, + "step": 135 + }, + { + "epoch": 0.4069767441860465, + "grad_norm": 0.9178513884544373, + "learning_rate": 2.942039721194304e-05, + "loss": 1.1161, + "step": 140 + }, + { + "epoch": 0.42151162790697677, + "grad_norm": 0.9335429668426514, + "learning_rate": 2.935235906942563e-05, + "loss": 1.0206, + "step": 145 + }, + { + "epoch": 0.436046511627907, + "grad_norm": 0.8570887446403503, + "learning_rate": 2.92806336853015e-05, + "loss": 0.9844, + "step": 150 + }, + { + "epoch": 0.45058139534883723, + "grad_norm": 0.9924285411834717, + "learning_rate": 2.920523948642432e-05, + "loss": 0.9529, + "step": 155 + }, + { + "epoch": 0.46511627906976744, + "grad_norm": 1.2329226732254028, + "learning_rate": 2.9126195842197113e-05, + "loss": 0.9636, + "step": 160 + }, + { + "epoch": 0.4796511627906977, + "grad_norm": 0.9957266449928284, + "learning_rate": 2.904352305959606e-05, + "loss": 0.9415, + "step": 165 + }, + { + "epoch": 0.4941860465116279, + "grad_norm": 1.0989147424697876, + "learning_rate": 2.895724237795347e-05, + "loss": 0.9765, + "step": 170 + }, + { + "epoch": 0.5087209302325582, + "grad_norm": 0.9607624411582947, + "learning_rate": 2.8867375963501223e-05, + "loss": 0.9151, + "step": 175 + }, + { + "epoch": 0.5232558139534884, + "grad_norm": 1.2114003896713257, + "learning_rate": 2.8773946903676092e-05, + "loss": 0.8421, + "step": 180 + }, + { + "epoch": 0.5377906976744186, + "grad_norm": 1.052228331565857, + "learning_rate": 2.8676979201188352e-05, + "loss": 0.8519, + "step": 185 + }, + { + "epoch": 0.5523255813953488, + "grad_norm": 1.2027859687805176, + "learning_rate": 2.8576497767855325e-05, + "loss": 0.8604, + "step": 190 + }, + { + "epoch": 0.5668604651162791, + "grad_norm": 1.0266679525375366, + "learning_rate": 2.8472528418201283e-05, + "loss": 0.7931, + "step": 195 + }, + { + "epoch": 0.5813953488372093, + "grad_norm": 1.2057733535766602, + "learning_rate": 2.8365097862825516e-05, + "loss": 0.8307, + "step": 200 + }, + { + "epoch": 0.5959302325581395, + "grad_norm": 1.2363107204437256, + "learning_rate": 2.825423370154013e-05, + "loss": 0.7787, + "step": 205 + }, + { + "epoch": 0.6104651162790697, + "grad_norm": 1.2585008144378662, + "learning_rate": 2.8139964416279427e-05, + "loss": 0.8151, + "step": 210 + }, + { + "epoch": 0.625, + "grad_norm": 1.1843583583831787, + "learning_rate": 2.8022319363782676e-05, + "loss": 0.7404, + "step": 215 + }, + { + "epoch": 0.6395348837209303, + "grad_norm": 1.1536660194396973, + "learning_rate": 2.7901328768052095e-05, + "loss": 0.7857, + "step": 220 + }, + { + "epoch": 0.6540697674418605, + "grad_norm": 1.2260042428970337, + "learning_rate": 2.7777023712588064e-05, + "loss": 0.7594, + "step": 225 + }, + { + "epoch": 0.6686046511627907, + "grad_norm": 1.1598570346832275, + "learning_rate": 2.7649436132403513e-05, + "loss": 0.6839, + "step": 230 + }, + { + "epoch": 0.6831395348837209, + "grad_norm": 1.2394260168075562, + "learning_rate": 2.7518598805819542e-05, + "loss": 0.7574, + "step": 235 + }, + { + "epoch": 0.6976744186046512, + "grad_norm": 1.298935055732727, + "learning_rate": 2.7384545346044402e-05, + "loss": 0.7439, + "step": 240 + }, + { + "epoch": 0.7122093023255814, + "grad_norm": 1.3825507164001465, + "learning_rate": 2.7247310192537978e-05, + "loss": 0.7379, + "step": 245 + }, + { + "epoch": 0.7267441860465116, + "grad_norm": 1.3109550476074219, + "learning_rate": 2.7106928602164006e-05, + "loss": 0.67, + "step": 250 + }, + { + "epoch": 0.7412790697674418, + "grad_norm": 1.11746084690094, + "learning_rate": 2.696343664013227e-05, + "loss": 0.623, + "step": 255 + }, + { + "epoch": 0.7558139534883721, + "grad_norm": 1.4749195575714111, + "learning_rate": 2.681687117073317e-05, + "loss": 0.6376, + "step": 260 + }, + { + "epoch": 0.7703488372093024, + "grad_norm": 1.2236706018447876, + "learning_rate": 2.666726984786696e-05, + "loss": 0.6694, + "step": 265 + }, + { + "epoch": 0.7848837209302325, + "grad_norm": 1.3780286312103271, + "learning_rate": 2.6514671105370166e-05, + "loss": 0.6722, + "step": 270 + }, + { + "epoch": 0.7994186046511628, + "grad_norm": 1.1760295629501343, + "learning_rate": 2.635911414714158e-05, + "loss": 0.6041, + "step": 275 + }, + { + "epoch": 0.813953488372093, + "grad_norm": 1.2596440315246582, + "learning_rate": 2.6200638937070474e-05, + "loss": 0.6016, + "step": 280 + }, + { + "epoch": 0.8284883720930233, + "grad_norm": 1.0804909467697144, + "learning_rate": 2.6039286188769527e-05, + "loss": 0.64, + "step": 285 + }, + { + "epoch": 0.8430232558139535, + "grad_norm": 1.5202122926712036, + "learning_rate": 2.587509735511516e-05, + "loss": 0.6752, + "step": 290 + }, + { + "epoch": 0.8575581395348837, + "grad_norm": 1.3087544441223145, + "learning_rate": 2.5708114617597946e-05, + "loss": 0.5625, + "step": 295 + }, + { + "epoch": 0.872093023255814, + "grad_norm": 1.3173868656158447, + "learning_rate": 2.553838087548584e-05, + "loss": 0.592, + "step": 300 + }, + { + "epoch": 0.8866279069767442, + "grad_norm": 1.3611654043197632, + "learning_rate": 2.5365939734802973e-05, + "loss": 0.5685, + "step": 305 + }, + { + "epoch": 0.9011627906976745, + "grad_norm": 1.1728929281234741, + "learning_rate": 2.5190835497126915e-05, + "loss": 0.5388, + "step": 310 + }, + { + "epoch": 0.9156976744186046, + "grad_norm": 1.2730668783187866, + "learning_rate": 2.501311314820722e-05, + "loss": 0.5013, + "step": 315 + }, + { + "epoch": 0.9302325581395349, + "grad_norm": 1.2505439519882202, + "learning_rate": 2.4832818346408228e-05, + "loss": 0.5199, + "step": 320 + }, + { + "epoch": 0.9447674418604651, + "grad_norm": 1.1878310441970825, + "learning_rate": 2.4649997410979012e-05, + "loss": 0.5002, + "step": 325 + }, + { + "epoch": 0.9593023255813954, + "grad_norm": 1.2723339796066284, + "learning_rate": 2.446469731015361e-05, + "loss": 0.5843, + "step": 330 + }, + { + "epoch": 0.9738372093023255, + "grad_norm": 1.2374998331069946, + "learning_rate": 2.4276965649084474e-05, + "loss": 0.501, + "step": 335 + }, + { + "epoch": 0.9883720930232558, + "grad_norm": 1.2415424585342407, + "learning_rate": 2.40868506576123e-05, + "loss": 0.527, + "step": 340 + }, + { + "epoch": 1.002906976744186, + "grad_norm": 1.2524995803833008, + "learning_rate": 2.3894401177875386e-05, + "loss": 0.4908, + "step": 345 + }, + { + "epoch": 1.0174418604651163, + "grad_norm": 1.380611538887024, + "learning_rate": 2.369966665176168e-05, + "loss": 0.4634, + "step": 350 + }, + { + "epoch": 1.0319767441860466, + "grad_norm": 1.2016781568527222, + "learning_rate": 2.350269710820675e-05, + "loss": 0.4358, + "step": 355 + }, + { + "epoch": 1.0465116279069768, + "grad_norm": 1.3040598630905151, + "learning_rate": 2.330354315034089e-05, + "loss": 0.4316, + "step": 360 + }, + { + "epoch": 1.0610465116279069, + "grad_norm": 1.2374192476272583, + "learning_rate": 2.3102255942488804e-05, + "loss": 0.4216, + "step": 365 + }, + { + "epoch": 1.0755813953488371, + "grad_norm": 1.2766934633255005, + "learning_rate": 2.2898887197025023e-05, + "loss": 0.3879, + "step": 370 + }, + { + "epoch": 1.0901162790697674, + "grad_norm": 1.3625224828720093, + "learning_rate": 2.2693489161088592e-05, + "loss": 0.413, + "step": 375 + }, + { + "epoch": 1.1046511627906976, + "grad_norm": 1.2175023555755615, + "learning_rate": 2.248611460316031e-05, + "loss": 0.3843, + "step": 380 + }, + { + "epoch": 1.119186046511628, + "grad_norm": 1.3372212648391724, + "learning_rate": 2.227681679950608e-05, + "loss": 0.3894, + "step": 385 + }, + { + "epoch": 1.1337209302325582, + "grad_norm": 1.2627370357513428, + "learning_rate": 2.2065649520489798e-05, + "loss": 0.3807, + "step": 390 + }, + { + "epoch": 1.1482558139534884, + "grad_norm": 1.2970643043518066, + "learning_rate": 2.1852667016759273e-05, + "loss": 0.3647, + "step": 395 + }, + { + "epoch": 1.1627906976744187, + "grad_norm": 1.2546086311340332, + "learning_rate": 2.1637924005308797e-05, + "loss": 0.3509, + "step": 400 + }, + { + "epoch": 1.177325581395349, + "grad_norm": 1.320049524307251, + "learning_rate": 2.1421475655421887e-05, + "loss": 0.4081, + "step": 405 + }, + { + "epoch": 1.191860465116279, + "grad_norm": 1.350244402885437, + "learning_rate": 2.120337757449781e-05, + "loss": 0.3325, + "step": 410 + }, + { + "epoch": 1.2063953488372092, + "grad_norm": 1.3032337427139282, + "learning_rate": 2.0983685793765626e-05, + "loss": 0.3763, + "step": 415 + }, + { + "epoch": 1.2209302325581395, + "grad_norm": 1.3012064695358276, + "learning_rate": 2.076245675388924e-05, + "loss": 0.3317, + "step": 420 + }, + { + "epoch": 1.2354651162790697, + "grad_norm": 1.3643220663070679, + "learning_rate": 2.0539747290467348e-05, + "loss": 0.3342, + "step": 425 + }, + { + "epoch": 1.25, + "grad_norm": 1.2481119632720947, + "learning_rate": 2.03156146194319e-05, + "loss": 0.3451, + "step": 430 + }, + { + "epoch": 1.2645348837209303, + "grad_norm": 1.485145926475525, + "learning_rate": 2.0090116322348816e-05, + "loss": 0.3666, + "step": 435 + }, + { + "epoch": 1.2790697674418605, + "grad_norm": 1.531805396080017, + "learning_rate": 1.9863310331624848e-05, + "loss": 0.3511, + "step": 440 + }, + { + "epoch": 1.2936046511627908, + "grad_norm": 1.4746474027633667, + "learning_rate": 1.963525491562421e-05, + "loss": 0.3327, + "step": 445 + }, + { + "epoch": 1.308139534883721, + "grad_norm": 1.0864694118499756, + "learning_rate": 1.9406008663698973e-05, + "loss": 0.3059, + "step": 450 + }, + { + "epoch": 1.322674418604651, + "grad_norm": 1.5392467975616455, + "learning_rate": 1.9175630471136952e-05, + "loss": 0.3047, + "step": 455 + }, + { + "epoch": 1.3372093023255813, + "grad_norm": 1.2908953428268433, + "learning_rate": 1.894417952403102e-05, + "loss": 0.3051, + "step": 460 + }, + { + "epoch": 1.3517441860465116, + "grad_norm": 1.6663968563079834, + "learning_rate": 1.8711715284073715e-05, + "loss": 0.3449, + "step": 465 + }, + { + "epoch": 1.3662790697674418, + "grad_norm": 1.211861252784729, + "learning_rate": 1.847829747328102e-05, + "loss": 0.3234, + "step": 470 + }, + { + "epoch": 1.380813953488372, + "grad_norm": 1.3294278383255005, + "learning_rate": 1.824398605864925e-05, + "loss": 0.3043, + "step": 475 + }, + { + "epoch": 1.3953488372093024, + "grad_norm": 1.3736189603805542, + "learning_rate": 1.8008841236749092e-05, + "loss": 0.3162, + "step": 480 + }, + { + "epoch": 1.4098837209302326, + "grad_norm": 1.5072505474090576, + "learning_rate": 1.7772923418260533e-05, + "loss": 0.3058, + "step": 485 + }, + { + "epoch": 1.4244186046511627, + "grad_norm": 1.2938207387924194, + "learning_rate": 1.753629321245288e-05, + "loss": 0.2621, + "step": 490 + }, + { + "epoch": 1.4389534883720931, + "grad_norm": 1.0965425968170166, + "learning_rate": 1.7299011411613738e-05, + "loss": 0.2824, + "step": 495 + }, + { + "epoch": 1.4534883720930232, + "grad_norm": 1.2044687271118164, + "learning_rate": 1.7061138975430944e-05, + "loss": 0.2832, + "step": 500 + }, + { + "epoch": 1.4680232558139534, + "grad_norm": 1.3761241436004639, + "learning_rate": 1.682273701533151e-05, + "loss": 0.3103, + "step": 505 + }, + { + "epoch": 1.4825581395348837, + "grad_norm": 1.3916083574295044, + "learning_rate": 1.6583866778781593e-05, + "loss": 0.2444, + "step": 510 + }, + { + "epoch": 1.497093023255814, + "grad_norm": 1.406844973564148, + "learning_rate": 1.6344589633551502e-05, + "loss": 0.2707, + "step": 515 + }, + { + "epoch": 1.5116279069767442, + "grad_norm": 1.2612719535827637, + "learning_rate": 1.6104967051949824e-05, + "loss": 0.2327, + "step": 520 + }, + { + "epoch": 1.5261627906976745, + "grad_norm": 1.56605064868927, + "learning_rate": 1.586506059503062e-05, + "loss": 0.2639, + "step": 525 + }, + { + "epoch": 1.5406976744186047, + "grad_norm": 1.3263754844665527, + "learning_rate": 1.5624931896777923e-05, + "loss": 0.2533, + "step": 530 + }, + { + "epoch": 1.5552325581395348, + "grad_norm": 1.1619617938995361, + "learning_rate": 1.538464264827143e-05, + "loss": 0.2383, + "step": 535 + }, + { + "epoch": 1.5697674418604652, + "grad_norm": 1.4183673858642578, + "learning_rate": 1.5144254581837549e-05, + "loss": 0.2431, + "step": 540 + }, + { + "epoch": 1.5843023255813953, + "grad_norm": 1.4761496782302856, + "learning_rate": 1.4903829455189833e-05, + "loss": 0.2809, + "step": 545 + }, + { + "epoch": 1.5988372093023255, + "grad_norm": 1.2451066970825195, + "learning_rate": 1.4663429035562928e-05, + "loss": 0.2088, + "step": 550 + }, + { + "epoch": 1.6133720930232558, + "grad_norm": 1.259175419807434, + "learning_rate": 1.4423115083844024e-05, + "loss": 0.248, + "step": 555 + }, + { + "epoch": 1.627906976744186, + "grad_norm": 1.2071088552474976, + "learning_rate": 1.4182949338705999e-05, + "loss": 0.2269, + "step": 560 + }, + { + "epoch": 1.6424418604651163, + "grad_norm": 1.2664886713027954, + "learning_rate": 1.394299350074619e-05, + "loss": 0.2282, + "step": 565 + }, + { + "epoch": 1.6569767441860463, + "grad_norm": 1.2288563251495361, + "learning_rate": 1.3703309216635049e-05, + "loss": 0.2449, + "step": 570 + }, + { + "epoch": 1.6715116279069768, + "grad_norm": 1.6389787197113037, + "learning_rate": 1.346395806327853e-05, + "loss": 0.2544, + "step": 575 + }, + { + "epoch": 1.6860465116279069, + "grad_norm": 1.471967101097107, + "learning_rate": 1.3225001531998518e-05, + "loss": 0.1962, + "step": 580 + }, + { + "epoch": 1.7005813953488373, + "grad_norm": 1.152327299118042, + "learning_rate": 1.2986501012735174e-05, + "loss": 0.1953, + "step": 585 + }, + { + "epoch": 1.7151162790697674, + "grad_norm": 1.2725104093551636, + "learning_rate": 1.2748517778275314e-05, + "loss": 0.2111, + "step": 590 + }, + { + "epoch": 1.7296511627906976, + "grad_norm": 1.324650526046753, + "learning_rate": 1.2511112968510988e-05, + "loss": 0.2301, + "step": 595 + }, + { + "epoch": 1.744186046511628, + "grad_norm": 1.100905418395996, + "learning_rate": 1.2274347574732037e-05, + "loss": 0.1883, + "step": 600 + }, + { + "epoch": 1.7587209302325582, + "grad_norm": 1.527988314628601, + "learning_rate": 1.2038282423956994e-05, + "loss": 0.2257, + "step": 605 + }, + { + "epoch": 1.7732558139534884, + "grad_norm": 1.1394156217575073, + "learning_rate": 1.1802978163306072e-05, + "loss": 0.224, + "step": 610 + }, + { + "epoch": 1.7877906976744184, + "grad_norm": 1.648760199546814, + "learning_rate": 1.1568495244420421e-05, + "loss": 0.2163, + "step": 615 + }, + { + "epoch": 1.802325581395349, + "grad_norm": 1.249718427658081, + "learning_rate": 1.1334893907931587e-05, + "loss": 0.1833, + "step": 620 + }, + { + "epoch": 1.816860465116279, + "grad_norm": 1.3375345468521118, + "learning_rate": 1.1102234167985209e-05, + "loss": 0.1831, + "step": 625 + }, + { + "epoch": 1.8313953488372094, + "grad_norm": 1.473465919494629, + "learning_rate": 1.087057579682284e-05, + "loss": 0.1904, + "step": 630 + }, + { + "epoch": 1.8459302325581395, + "grad_norm": 1.3396551609039307, + "learning_rate": 1.0639978309425997e-05, + "loss": 0.2149, + "step": 635 + }, + { + "epoch": 1.8604651162790697, + "grad_norm": 1.3565618991851807, + "learning_rate": 1.0410500948226247e-05, + "loss": 0.1773, + "step": 640 + }, + { + "epoch": 1.875, + "grad_norm": 1.2447470426559448, + "learning_rate": 1.0182202667885317e-05, + "loss": 0.1797, + "step": 645 + }, + { + "epoch": 1.8895348837209303, + "grad_norm": 1.197994351387024, + "learning_rate": 9.955142120149176e-06, + "loss": 0.2075, + "step": 650 + }, + { + "epoch": 1.9040697674418605, + "grad_norm": 1.2260866165161133, + "learning_rate": 9.729377638779859e-06, + "loss": 0.1637, + "step": 655 + }, + { + "epoch": 1.9186046511627906, + "grad_norm": 1.3982919454574585, + "learning_rate": 9.5049672245691e-06, + "loss": 0.1754, + "step": 660 + }, + { + "epoch": 1.933139534883721, + "grad_norm": 1.246808648109436, + "learning_rate": 9.281968530437374e-06, + "loss": 0.1641, + "step": 665 + }, + { + "epoch": 1.947674418604651, + "grad_norm": 1.4287950992584229, + "learning_rate": 9.060438846622436e-06, + "loss": 0.1983, + "step": 670 + }, + { + "epoch": 1.9622093023255816, + "grad_norm": 1.0923234224319458, + "learning_rate": 8.840435085960932e-06, + "loss": 0.1529, + "step": 675 + }, + { + "epoch": 1.9767441860465116, + "grad_norm": 1.4138095378875732, + "learning_rate": 8.62201376926703e-06, + "loss": 0.1659, + "step": 680 + }, + { + "epoch": 1.9912790697674418, + "grad_norm": 1.1389576196670532, + "learning_rate": 8.405231010811771e-06, + "loss": 0.1863, + "step": 685 + }, + { + "epoch": 2.005813953488372, + "grad_norm": 1.489659070968628, + "learning_rate": 8.190142503906798e-06, + "loss": 0.1418, + "step": 690 + }, + { + "epoch": 2.020348837209302, + "grad_norm": 1.0120278596878052, + "learning_rate": 7.976803506596316e-06, + "loss": 0.1113, + "step": 695 + }, + { + "epoch": 2.0348837209302326, + "grad_norm": 1.1574699878692627, + "learning_rate": 7.765268827460797e-06, + "loss": 0.117, + "step": 700 + }, + { + "epoch": 2.0494186046511627, + "grad_norm": 1.4453458786010742, + "learning_rate": 7.555592811536254e-06, + "loss": 0.1115, + "step": 705 + }, + { + "epoch": 2.063953488372093, + "grad_norm": 1.2999567985534668, + "learning_rate": 7.347829326352459e-06, + "loss": 0.1288, + "step": 710 + }, + { + "epoch": 2.078488372093023, + "grad_norm": 1.3467832803726196, + "learning_rate": 7.142031748094016e-06, + "loss": 0.1203, + "step": 715 + }, + { + "epoch": 2.0930232558139537, + "grad_norm": 1.0434598922729492, + "learning_rate": 6.93825294788751e-06, + "loss": 0.152, + "step": 720 + }, + { + "epoch": 2.1075581395348837, + "grad_norm": 1.363543152809143, + "learning_rate": 6.736545278218464e-06, + "loss": 0.1328, + "step": 725 + }, + { + "epoch": 2.1220930232558137, + "grad_norm": 0.953025758266449, + "learning_rate": 6.536960559481605e-06, + "loss": 0.1194, + "step": 730 + }, + { + "epoch": 2.136627906976744, + "grad_norm": 1.2571179866790771, + "learning_rate": 6.339550066667711e-06, + "loss": 0.1394, + "step": 735 + }, + { + "epoch": 2.1511627906976742, + "grad_norm": 0.9825882315635681, + "learning_rate": 6.144364516190662e-06, + "loss": 0.1107, + "step": 740 + }, + { + "epoch": 2.1656976744186047, + "grad_norm": 0.9452372193336487, + "learning_rate": 5.951454052857953e-06, + "loss": 0.1118, + "step": 745 + }, + { + "epoch": 2.1802325581395348, + "grad_norm": 1.327471137046814, + "learning_rate": 5.760868236988102e-06, + "loss": 0.1275, + "step": 750 + }, + { + "epoch": 2.1947674418604652, + "grad_norm": 0.9882895946502686, + "learning_rate": 5.572656031678146e-06, + "loss": 0.1101, + "step": 755 + }, + { + "epoch": 2.2093023255813953, + "grad_norm": 1.1499403715133667, + "learning_rate": 5.386865790224638e-06, + "loss": 0.1177, + "step": 760 + }, + { + "epoch": 2.2238372093023258, + "grad_norm": 1.325013279914856, + "learning_rate": 5.203545243701269e-06, + "loss": 0.124, + "step": 765 + }, + { + "epoch": 2.238372093023256, + "grad_norm": 0.8665274977684021, + "learning_rate": 5.022741488696368e-06, + "loss": 0.1227, + "step": 770 + }, + { + "epoch": 2.2529069767441863, + "grad_norm": 1.0002868175506592, + "learning_rate": 4.8445009752133615e-06, + "loss": 0.1165, + "step": 775 + }, + { + "epoch": 2.2674418604651163, + "grad_norm": 1.0755016803741455, + "learning_rate": 4.668869494737406e-06, + "loss": 0.1222, + "step": 780 + }, + { + "epoch": 2.2819767441860463, + "grad_norm": 0.9953200221061707, + "learning_rate": 4.49589216847118e-06, + "loss": 0.1059, + "step": 785 + }, + { + "epoch": 2.296511627906977, + "grad_norm": 0.9197989702224731, + "learning_rate": 4.325613435742814e-06, + "loss": 0.1062, + "step": 790 + }, + { + "epoch": 2.311046511627907, + "grad_norm": 0.9167353510856628, + "learning_rate": 4.158077042589129e-06, + "loss": 0.0941, + "step": 795 + }, + { + "epoch": 2.3255813953488373, + "grad_norm": 1.004165768623352, + "learning_rate": 3.9933260305168436e-06, + "loss": 0.1122, + "step": 800 + }, + { + "epoch": 2.3401162790697674, + "grad_norm": 0.8908636569976807, + "learning_rate": 3.831402725444897e-06, + "loss": 0.1361, + "step": 805 + }, + { + "epoch": 2.354651162790698, + "grad_norm": 0.999954879283905, + "learning_rate": 3.6723487268305327e-06, + "loss": 0.1026, + "step": 810 + }, + { + "epoch": 2.369186046511628, + "grad_norm": 0.9715881943702698, + "learning_rate": 3.5162048969820787e-06, + "loss": 0.1194, + "step": 815 + }, + { + "epoch": 2.383720930232558, + "grad_norm": 0.9164047837257385, + "learning_rate": 3.3630113505610523e-06, + "loss": 0.0999, + "step": 820 + }, + { + "epoch": 2.3982558139534884, + "grad_norm": 0.7661182880401611, + "learning_rate": 3.212807444276365e-06, + "loss": 0.097, + "step": 825 + }, + { + "epoch": 2.4127906976744184, + "grad_norm": 0.8208587765693665, + "learning_rate": 3.065631766773286e-06, + "loss": 0.0918, + "step": 830 + }, + { + "epoch": 2.427325581395349, + "grad_norm": 0.9974421262741089, + "learning_rate": 2.921522128719658e-06, + "loss": 0.1024, + "step": 835 + }, + { + "epoch": 2.441860465116279, + "grad_norm": 0.9220830798149109, + "learning_rate": 2.780515553092038e-06, + "loss": 0.0949, + "step": 840 + }, + { + "epoch": 2.4563953488372094, + "grad_norm": 1.000914454460144, + "learning_rate": 2.642648265664175e-06, + "loss": 0.1158, + "step": 845 + }, + { + "epoch": 2.4709302325581395, + "grad_norm": 1.0283595323562622, + "learning_rate": 2.5079556857003256e-06, + "loss": 0.1014, + "step": 850 + }, + { + "epoch": 2.4854651162790695, + "grad_norm": 0.7097103595733643, + "learning_rate": 2.3764724168557027e-06, + "loss": 0.0878, + "step": 855 + }, + { + "epoch": 2.5, + "grad_norm": 0.8873803615570068, + "learning_rate": 2.248232238286562e-06, + "loss": 0.1006, + "step": 860 + }, + { + "epoch": 2.5145348837209305, + "grad_norm": 0.7884091138839722, + "learning_rate": 2.1232680959720085e-06, + "loss": 0.1001, + "step": 865 + }, + { + "epoch": 2.5290697674418605, + "grad_norm": 0.7166872024536133, + "learning_rate": 2.00161209424992e-06, + "loss": 0.1022, + "step": 870 + }, + { + "epoch": 2.5436046511627906, + "grad_norm": 0.7053694128990173, + "learning_rate": 1.8832954875690656e-06, + "loss": 0.0935, + "step": 875 + }, + { + "epoch": 2.558139534883721, + "grad_norm": 0.9858315587043762, + "learning_rate": 1.768348672459575e-06, + "loss": 0.1089, + "step": 880 + }, + { + "epoch": 2.572674418604651, + "grad_norm": 0.8280432820320129, + "learning_rate": 1.6568011797238247e-06, + "loss": 0.1087, + "step": 885 + }, + { + "epoch": 2.5872093023255816, + "grad_norm": 0.8028408885002136, + "learning_rate": 1.5486816668497046e-06, + "loss": 0.1052, + "step": 890 + }, + { + "epoch": 2.6017441860465116, + "grad_norm": 0.8483460545539856, + "learning_rate": 1.4440179106482987e-06, + "loss": 0.1095, + "step": 895 + }, + { + "epoch": 2.616279069767442, + "grad_norm": 0.986524760723114, + "learning_rate": 1.342836800117762e-06, + "loss": 0.0923, + "step": 900 + }, + { + "epoch": 2.630813953488372, + "grad_norm": 0.7955085039138794, + "learning_rate": 1.2451643295353238e-06, + "loss": 0.0887, + "step": 905 + }, + { + "epoch": 2.645348837209302, + "grad_norm": 0.8616383075714111, + "learning_rate": 1.151025591779139e-06, + "loss": 0.1115, + "step": 910 + }, + { + "epoch": 2.6598837209302326, + "grad_norm": 0.8315482139587402, + "learning_rate": 1.0604447718817212e-06, + "loss": 0.0947, + "step": 915 + }, + { + "epoch": 2.6744186046511627, + "grad_norm": 0.7204957008361816, + "learning_rate": 9.73445140816585e-07, + "loss": 0.083, + "step": 920 + }, + { + "epoch": 2.688953488372093, + "grad_norm": 0.823000431060791, + "learning_rate": 8.900490495197627e-07, + "loss": 0.1099, + "step": 925 + }, + { + "epoch": 2.703488372093023, + "grad_norm": 1.0032994747161865, + "learning_rate": 8.102779231476482e-07, + "loss": 0.0977, + "step": 930 + }, + { + "epoch": 2.7180232558139537, + "grad_norm": 0.6775994300842285, + "learning_rate": 7.341522555726971e-07, + "loss": 0.0948, + "step": 935 + }, + { + "epoch": 2.7325581395348837, + "grad_norm": 0.801041841506958, + "learning_rate": 6.616916041183718e-07, + "loss": 0.1044, + "step": 940 + }, + { + "epoch": 2.7470930232558137, + "grad_norm": 0.7917124629020691, + "learning_rate": 5.929145845347106e-07, + "loss": 0.0921, + "step": 945 + }, + { + "epoch": 2.761627906976744, + "grad_norm": 0.7432550191879272, + "learning_rate": 5.278388662157846e-07, + "loss": 0.0829, + "step": 950 + }, + { + "epoch": 2.7761627906976747, + "grad_norm": 0.779715359210968, + "learning_rate": 4.6648116766027095e-07, + "loss": 0.0846, + "step": 955 + }, + { + "epoch": 2.7906976744186047, + "grad_norm": 0.8047905564308167, + "learning_rate": 4.0885725217634337e-07, + "loss": 0.0894, + "step": 960 + }, + { + "epoch": 2.8052325581395348, + "grad_norm": 0.9380701780319214, + "learning_rate": 3.549819238319385e-07, + "loss": 0.082, + "step": 965 + }, + { + "epoch": 2.8197674418604652, + "grad_norm": 0.6520321369171143, + "learning_rate": 3.0486902365146787e-07, + "loss": 0.0878, + "step": 970 + }, + { + "epoch": 2.8343023255813953, + "grad_norm": 0.9061381220817566, + "learning_rate": 2.585314260599425e-07, + "loss": 0.0986, + "step": 975 + }, + { + "epoch": 2.8488372093023253, + "grad_norm": 0.8139726519584656, + "learning_rate": 2.1598103557542715e-07, + "loss": 0.079, + "step": 980 + }, + { + "epoch": 2.863372093023256, + "grad_norm": 0.7030637860298157, + "learning_rate": 1.7722878375066475e-07, + "loss": 0.0987, + "step": 985 + }, + { + "epoch": 2.8779069767441863, + "grad_norm": 0.961593747138977, + "learning_rate": 1.4228462636467487e-07, + "loss": 0.0992, + "step": 990 + }, + { + "epoch": 2.8924418604651163, + "grad_norm": 0.6773058772087097, + "learning_rate": 1.111575408650245e-07, + "loss": 0.0958, + "step": 995 + }, + { + "epoch": 2.9069767441860463, + "grad_norm": 0.7029597759246826, + "learning_rate": 8.385552406145302e-08, + "loss": 0.0809, + "step": 1000 + }, + { + "epoch": 2.921511627906977, + "grad_norm": 0.9074150323867798, + "learning_rate": 6.038559007141397e-08, + "loss": 0.0891, + "step": 1005 + }, + { + "epoch": 2.936046511627907, + "grad_norm": 0.7209482789039612, + "learning_rate": 4.075376851810308e-08, + "loss": 0.0883, + "step": 1010 + }, + { + "epoch": 2.9505813953488373, + "grad_norm": 0.8554053902626038, + "learning_rate": 2.4965102981387432e-08, + "loss": 0.0977, + "step": 1015 + }, + { + "epoch": 2.9651162790697674, + "grad_norm": 0.6894388198852539, + "learning_rate": 1.3023649702066931e-08, + "loss": 0.0796, + "step": 1020 + }, + { + "epoch": 2.979651162790698, + "grad_norm": 0.7438033223152161, + "learning_rate": 4.9324765398028884e-09, + "loss": 0.1003, + "step": 1025 + }, + { + "epoch": 2.994186046511628, + "grad_norm": 0.8008667826652527, + "learning_rate": 6.936621849451541e-10, + "loss": 0.085, + "step": 1030 + }, + { + "epoch": 3.0, + "step": 1032, + "total_flos": 1.2911580544436797e+18, + "train_loss": 0.4547977649801692, + "train_runtime": 560.3042, + "train_samples_per_second": 58.859, + "train_steps_per_second": 1.842 + } + ], + "logging_steps": 5, + "max_steps": 1032, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2911580544436797e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/training_args.bin b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..551da2f930447e5e15c577ccac17dbe112d4ff55 --- /dev/null +++ b/hotpotqa_train_knowledge_50_instruct/9_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43f85a07e7e2b7e8da3ac8b30e92147e6612a0c342a6684a1c07d80ddfbdf4dc +size 8273