diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..9317e778def9dc143a98020f8f5bbb59ececc30c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,27 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text +barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_config.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9449a418561c1f39a702f932148080a7d3027637 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj", + "k_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_model.safetensors b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c167d3f80b1164a195a27de5d807af669928411c --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f064ccf2bd3f7207c6ff3716f473cafa10dacdf391461a92436aa1058fe9a547 +size 335605144 diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/all_results.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7899dcc4e4e215f50c25a3ca9411726d4ea7e37 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 2.8299985921507328e+17, + "train_loss": 4.945504999997323, + "train_runtime": 195.3785, + "train_samples": 1814, + "train_samples_per_second": 9.285, + "train_steps_per_second": 0.583 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/chat_template.jinja b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/config.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/generation_config.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..86546ab87e4eb79c96f00ffe17ad89ca00e2ecc1 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.52.4" +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/special_tokens_map.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer_config.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/train_results.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7899dcc4e4e215f50c25a3ca9411726d4ea7e37 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 2.8299985921507328e+17, + "train_loss": 4.945504999997323, + "train_runtime": 195.3785, + "train_samples": 1814, + "train_samples_per_second": 9.285, + "train_steps_per_second": 0.583 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/trainer_state.json b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..da3d0e10c1f2979a6aa66c4a40857d7875ba7296 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/trainer_state.json @@ -0,0 +1,197 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.04405286343612335, + "grad_norm": 29.493370056152344, + "learning_rate": 9.999999999999999e-06, + "loss": 9.444, + "step": 5 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 16.666152954101562, + "learning_rate": 2.25e-05, + "loss": 8.0504, + "step": 10 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 5.740612506866455, + "learning_rate": 2.9971549931055665e-05, + "loss": 5.841, + "step": 15 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 3.649662733078003, + "learning_rate": 2.9652724766394012e-05, + "loss": 5.2463, + "step": 20 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 3.4300012588500977, + "learning_rate": 2.8987083441065335e-05, + "loss": 4.965, + "step": 25 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 3.1103951930999756, + "learning_rate": 2.7990381056766583e-05, + "loss": 4.8822, + "step": 30 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 3.4323017597198486, + "learning_rate": 2.6686208617885057e-05, + "loss": 4.6976, + "step": 35 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 2.8730552196502686, + "learning_rate": 2.510543465469836e-05, + "loss": 4.5988, + "step": 40 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 2.7638320922851562, + "learning_rate": 2.3285474594407588e-05, + "loss": 4.6746, + "step": 45 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 2.726217746734619, + "learning_rate": 2.1269405173301752e-05, + "loss": 4.5865, + "step": 50 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 2.6741786003112793, + "learning_rate": 1.9104944851081247e-05, + "loss": 4.4796, + "step": 55 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 2.884930372238159, + "learning_rate": 1.6843324359970714e-05, + "loss": 4.4189, + "step": 60 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 3.0847418308258057, + "learning_rate": 1.4538074121657448e-05, + "loss": 4.4774, + "step": 65 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 2.759352445602417, + "learning_rate": 1.2243757232751444e-05, + "loss": 4.3831, + "step": 70 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 3.077382802963257, + "learning_rate": 1.0014678007805108e-05, + "loss": 4.3349, + "step": 75 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 2.7511823177337646, + "learning_rate": 7.903596647459851e-06, + "loss": 4.3551, + "step": 80 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 3.04829478263855, + "learning_rate": 5.960480454311155e-06, + "loss": 4.4464, + "step": 85 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 3.0664618015289307, + "learning_rate": 4.231321154033372e-06, + "loss": 4.4373, + "step": 90 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 2.962254047393799, + "learning_rate": 2.757046314656676e-06, + "loss": 4.3569, + "step": 95 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 3.103513240814209, + "learning_rate": 1.5725506296740666e-06, + "loss": 4.323, + "step": 100 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 3.0217154026031494, + "learning_rate": 7.058699935926527e-07, + "loss": 4.4199, + "step": 105 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 2.967168092727661, + "learning_rate": 1.7751791878110933e-07, + "loss": 4.2579, + "step": 110 + }, + { + "epoch": 1.0, + "step": 114, + "total_flos": 2.8299985921507328e+17, + "train_loss": 4.945504999997323, + "train_runtime": 195.3785, + "train_samples_per_second": 9.285, + "train_steps_per_second": 0.583 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.8299985921507328e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/training_args.bin b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2afa3f200dfb872462f08a570b9b3a22921a112f --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch/64_e1_3.0e-05/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e668f871880ed8d055c6aa307e73a2061078b1a5bd2b98ec1ab126fadf8381 +size 8081 diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/adapter_config.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0a6c446651914f9992b2514430555c12b70483b2 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "q_proj", + "gate_proj", + "v_proj", + "o_proj", + "down_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/adapter_model.safetensors b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78d0bdcfa744b73d015817bae36d0fc765749ffe --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c09b2eeabec4798cfc6180d657b5080666608a44957f89d0f0cd23de99d31e6b +size 335605144 diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/all_results.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1902259b8627ea0cb4a3f489270f36c160a42ebf --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 4.044527795919913e+17, + "train_loss": 3.1783380441825484, + "train_runtime": 284.2876, + "train_samples": 2851, + "train_samples_per_second": 10.029, + "train_steps_per_second": 0.63 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/chat_template.jinja b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/config.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/generation_config.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..86546ab87e4eb79c96f00ffe17ad89ca00e2ecc1 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.52.4" +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/special_tokens_map.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer_config.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/train_results.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1902259b8627ea0cb4a3f489270f36c160a42ebf --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 4.044527795919913e+17, + "train_loss": 3.1783380441825484, + "train_runtime": 284.2876, + "train_samples": 2851, + "train_samples_per_second": 10.029, + "train_steps_per_second": 0.63 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/trainer_state.json b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..50efb788bc817157fe03b220ea12e90f2f097ba0 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/trainer_state.json @@ -0,0 +1,288 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 179, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.028050490883590462, + "grad_norm": 1379.8271484375, + "learning_rate": 6.666666666666667e-06, + "loss": 55.5332, + "step": 5 + }, + { + "epoch": 0.056100981767180924, + "grad_norm": 108.1146240234375, + "learning_rate": 1.5e-05, + "loss": 12.8112, + "step": 10 + }, + { + "epoch": 0.08415147265077139, + "grad_norm": 20.724315643310547, + "learning_rate": 2.3333333333333336e-05, + "loss": 3.0069, + "step": 15 + }, + { + "epoch": 0.11220196353436185, + "grad_norm": 21.16265296936035, + "learning_rate": 2.9997144412471088e-05, + "loss": 2.495, + "step": 20 + }, + { + "epoch": 0.1402524544179523, + "grad_norm": 28.13153839111328, + "learning_rate": 2.9897312963925566e-05, + "loss": 2.2797, + "step": 25 + }, + { + "epoch": 0.16830294530154277, + "grad_norm": 28.368040084838867, + "learning_rate": 2.9655787530315307e-05, + "loss": 2.3395, + "step": 30 + }, + { + "epoch": 0.19635343618513323, + "grad_norm": 20.231536865234375, + "learning_rate": 2.927486535150267e-05, + "loss": 2.0189, + "step": 35 + }, + { + "epoch": 0.2244039270687237, + "grad_norm": 26.563016891479492, + "learning_rate": 2.8758169522581795e-05, + "loss": 2.1708, + "step": 40 + }, + { + "epoch": 0.25245441795231416, + "grad_norm": 26.28299903869629, + "learning_rate": 2.8110614533249145e-05, + "loss": 1.9349, + "step": 45 + }, + { + "epoch": 0.2805049088359046, + "grad_norm": 27.670913696289062, + "learning_rate": 2.7338359524230917e-05, + "loss": 1.8713, + "step": 50 + }, + { + "epoch": 0.3085553997194951, + "grad_norm": 29.002378463745117, + "learning_rate": 2.6448749705363208e-05, + "loss": 1.7072, + "step": 55 + }, + { + "epoch": 0.33660589060308554, + "grad_norm": 24.703224182128906, + "learning_rate": 2.54502464925197e-05, + "loss": 1.8121, + "step": 60 + }, + { + "epoch": 0.364656381486676, + "grad_norm": 29.92906379699707, + "learning_rate": 2.4352347027881003e-05, + "loss": 1.654, + "step": 65 + }, + { + "epoch": 0.39270687237026647, + "grad_norm": 24.801559448242188, + "learning_rate": 2.3165493849018966e-05, + "loss": 1.5359, + "step": 70 + }, + { + "epoch": 0.42075736325385693, + "grad_norm": 34.28458023071289, + "learning_rate": 2.1900975565967284e-05, + "loss": 1.6406, + "step": 75 + }, + { + "epoch": 0.4488078541374474, + "grad_norm": 36.270511627197266, + "learning_rate": 2.0570819490976602e-05, + "loss": 1.3171, + "step": 80 + }, + { + "epoch": 0.47685834502103785, + "grad_norm": 33.70671081542969, + "learning_rate": 1.918767724219309e-05, + "loss": 1.3344, + "step": 85 + }, + { + "epoch": 0.5049088359046283, + "grad_norm": 40.40463638305664, + "learning_rate": 1.776470440932719e-05, + "loss": 1.1337, + "step": 90 + }, + { + "epoch": 0.5329593267882188, + "grad_norm": 30.696088790893555, + "learning_rate": 1.6315435425858007e-05, + "loss": 0.8504, + "step": 95 + }, + { + "epoch": 0.5610098176718092, + "grad_norm": 39.22968292236328, + "learning_rate": 1.485365483791113e-05, + "loss": 1.0301, + "step": 100 + }, + { + "epoch": 0.5890603085553997, + "grad_norm": 33.72607421875, + "learning_rate": 1.339326619422034e-05, + "loss": 0.983, + "step": 105 + }, + { + "epoch": 0.6171107994389902, + "grad_norm": 26.01291847229004, + "learning_rate": 1.1948159804210497e-05, + "loss": 1.0315, + "step": 110 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 22.80808448791504, + "learning_rate": 1.0532080622004453e-05, + "loss": 1.0307, + "step": 115 + }, + { + "epoch": 0.6732117812061711, + "grad_norm": 32.01971435546875, + "learning_rate": 9.158497512959335e-06, + "loss": 0.826, + "step": 120 + }, + { + "epoch": 0.7012622720897616, + "grad_norm": 42.26045608520508, + "learning_rate": 7.840475146187582e-06, + "loss": 0.8369, + "step": 125 + }, + { + "epoch": 0.729312762973352, + "grad_norm": 44.010467529296875, + "learning_rate": 6.590549731541573e-06, + "loss": 0.9926, + "step": 130 + }, + { + "epoch": 0.7573632538569425, + "grad_norm": 32.69865417480469, + "learning_rate": 5.4206097829743856e-06, + "loss": 0.747, + "step": 135 + }, + { + "epoch": 0.7854137447405329, + "grad_norm": 33.29957580566406, + "learning_rate": 4.341783042381501e-06, + "loss": 1.1197, + "step": 140 + }, + { + "epoch": 0.8134642356241234, + "grad_norm": 31.13404655456543, + "learning_rate": 3.3643306394337016e-06, + "loss": 0.6578, + "step": 145 + }, + { + "epoch": 0.8415147265077139, + "grad_norm": 20.46255111694336, + "learning_rate": 2.4975494940869687e-06, + "loss": 0.7867, + "step": 150 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 21.053003311157227, + "learning_rate": 1.7496838900561229e-06, + "loss": 0.5904, + "step": 155 + }, + { + "epoch": 0.8976157082748948, + "grad_norm": 47.3381462097168, + "learning_rate": 1.127847060310403e-06, + "loss": 0.7869, + "step": 160 + }, + { + "epoch": 0.9256661991584852, + "grad_norm": 57.58197021484375, + "learning_rate": 6.3795353042096e-07, + "loss": 0.7933, + "step": 165 + }, + { + "epoch": 0.9537166900420757, + "grad_norm": 36.09605407714844, + "learning_rate": 2.8466286326840676e-07, + "loss": 0.7767, + "step": 170 + }, + { + "epoch": 0.9817671809256662, + "grad_norm": 34.18437194824219, + "learning_rate": 7.133534017569998e-08, + "loss": 0.927, + "step": 175 + }, + { + "epoch": 1.0, + "step": 179, + "total_flos": 4.044527795919913e+17, + "train_loss": 3.1783380441825484, + "train_runtime": 284.2876, + "train_samples_per_second": 10.029, + "train_steps_per_second": 0.63 + } + ], + "logging_steps": 5, + "max_steps": 179, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.044527795919913e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/training_args.bin b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..52ba865fecf4f1e1ee65473113d6fe34ab6d3f3f --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_base_3epoch_no_reasoning/64_e1_3.0e-05/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f7ee67b52b63688e98c46d05d4476523a470af92f31a91bef6b20e5ee214ba4 +size 8081 diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ec06d20e58c431eb490d1c22ef2a9a3b05c442e --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "k_proj", + "up_proj", + "down_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_model.safetensors b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66f91c4259c1a07870204c50e2aca55f134db01e --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09c0d2da0dbfa5b9f90cbe804e188a1ae4a6420999a7c5c7301c3f2220454aa9 +size 335605144 diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/all_results.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..925f543827dd024e10b6da810a5a2d4322047292 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 2.8299985921507328e+17, + "train_loss": 4.773955010531242, + "train_runtime": 196.1473, + "train_samples": 1814, + "train_samples_per_second": 9.248, + "train_steps_per_second": 0.581 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/chat_template.jinja b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/generation_config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..86546ab87e4eb79c96f00ffe17ad89ca00e2ecc1 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.52.4" +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/special_tokens_map.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer_config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/train_results.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..925f543827dd024e10b6da810a5a2d4322047292 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 2.8299985921507328e+17, + "train_loss": 4.773955010531242, + "train_runtime": 196.1473, + "train_samples": 1814, + "train_samples_per_second": 9.248, + "train_steps_per_second": 0.581 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/trainer_state.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ec72a46b5e697b6bd994be1ac321d1e1a9c7ba6 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/trainer_state.json @@ -0,0 +1,197 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 114, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.04405286343612335, + "grad_norm": 24.819272994995117, + "learning_rate": 9.999999999999999e-06, + "loss": 7.9126, + "step": 5 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 8.255205154418945, + "learning_rate": 2.25e-05, + "loss": 6.7871, + "step": 10 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 4.8888444900512695, + "learning_rate": 2.9971549931055665e-05, + "loss": 5.5916, + "step": 15 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 3.5335583686828613, + "learning_rate": 2.9652724766394012e-05, + "loss": 5.0805, + "step": 20 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 3.101619005203247, + "learning_rate": 2.8987083441065335e-05, + "loss": 4.8778, + "step": 25 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 2.9929182529449463, + "learning_rate": 2.7990381056766583e-05, + "loss": 4.8024, + "step": 30 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 3.2556817531585693, + "learning_rate": 2.6686208617885057e-05, + "loss": 4.6406, + "step": 35 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 2.803607940673828, + "learning_rate": 2.510543465469836e-05, + "loss": 4.5586, + "step": 40 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 2.706911563873291, + "learning_rate": 2.3285474594407588e-05, + "loss": 4.6298, + "step": 45 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 2.654039144515991, + "learning_rate": 2.1269405173301752e-05, + "loss": 4.5491, + "step": 50 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 2.655120372772217, + "learning_rate": 1.9104944851081247e-05, + "loss": 4.4531, + "step": 55 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 2.7391159534454346, + "learning_rate": 1.6843324359970714e-05, + "loss": 4.3923, + "step": 60 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 3.060450792312622, + "learning_rate": 1.4538074121657448e-05, + "loss": 4.4517, + "step": 65 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 2.7751266956329346, + "learning_rate": 1.2243757232751444e-05, + "loss": 4.35, + "step": 70 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 3.063387632369995, + "learning_rate": 1.0014678007805108e-05, + "loss": 4.3087, + "step": 75 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 2.754143238067627, + "learning_rate": 7.903596647459851e-06, + "loss": 4.3276, + "step": 80 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 3.0200438499450684, + "learning_rate": 5.960480454311155e-06, + "loss": 4.4189, + "step": 85 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 3.024763345718384, + "learning_rate": 4.231321154033372e-06, + "loss": 4.4141, + "step": 90 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 3.185255289077759, + "learning_rate": 2.757046314656676e-06, + "loss": 4.3379, + "step": 95 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 3.0675833225250244, + "learning_rate": 1.5725506296740666e-06, + "loss": 4.2864, + "step": 100 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 2.9702301025390625, + "learning_rate": 7.058699935926527e-07, + "loss": 4.3944, + "step": 105 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 2.887024402618408, + "learning_rate": 1.7751791878110933e-07, + "loss": 4.223, + "step": 110 + }, + { + "epoch": 1.0, + "step": 114, + "total_flos": 2.8299985921507328e+17, + "train_loss": 4.773955010531242, + "train_runtime": 196.1473, + "train_samples_per_second": 9.248, + "train_steps_per_second": 0.581 + } + ], + "logging_steps": 5, + "max_steps": 114, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.8299985921507328e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/training_args.bin b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b364409972067bd17f8a927fc61af6b4aebd34e --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch/64_e1_3.0e-05/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53a99fc8574d512ca279c8fc1647c1310502796f4bef5be9f683c1631ba452fd +size 8081 diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/adapter_config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..62ad72582968a5e609a9f8574f29ac9632caaf76 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": false, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "down_proj", + "gate_proj", + "v_proj", + "o_proj", + "k_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/adapter_model.safetensors b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..808808036454bd2d5d54f8aa0f14c899f347590d --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b4669bc0820631798715a485746d112301c03a1d9873afa7ff69bd35d25ee5 +size 335605144 diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/all_results.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..41a4da2037aa0374061bca5b5bff99df41cf906b --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 4.044527795919913e+17, + "train_loss": 2.9361699932780345, + "train_runtime": 285.2365, + "train_samples": 2851, + "train_samples_per_second": 9.995, + "train_steps_per_second": 0.628 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/chat_template.jinja b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/generation_config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..86546ab87e4eb79c96f00ffe17ad89ca00e2ecc1 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.52.4" +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/special_tokens_map.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer_config.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/train_results.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..41a4da2037aa0374061bca5b5bff99df41cf906b --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 4.044527795919913e+17, + "train_loss": 2.9361699932780345, + "train_runtime": 285.2365, + "train_samples": 2851, + "train_samples_per_second": 9.995, + "train_steps_per_second": 0.628 +} \ No newline at end of file diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/trainer_state.json b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..612be30f9ea796133931a6893e909e127d66b191 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/trainer_state.json @@ -0,0 +1,288 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 179, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.028050490883590462, + "grad_norm": 1520.0611572265625, + "learning_rate": 6.666666666666667e-06, + "loss": 48.421, + "step": 5 + }, + { + "epoch": 0.056100981767180924, + "grad_norm": 74.57456970214844, + "learning_rate": 1.5e-05, + "loss": 10.4235, + "step": 10 + }, + { + "epoch": 0.08415147265077139, + "grad_norm": 18.727670669555664, + "learning_rate": 2.3333333333333336e-05, + "loss": 2.7814, + "step": 15 + }, + { + "epoch": 0.11220196353436185, + "grad_norm": 22.32657241821289, + "learning_rate": 2.9997144412471088e-05, + "loss": 2.4948, + "step": 20 + }, + { + "epoch": 0.1402524544179523, + "grad_norm": 26.8909969329834, + "learning_rate": 2.9897312963925566e-05, + "loss": 2.2046, + "step": 25 + }, + { + "epoch": 0.16830294530154277, + "grad_norm": 33.73020935058594, + "learning_rate": 2.9655787530315307e-05, + "loss": 2.2691, + "step": 30 + }, + { + "epoch": 0.19635343618513323, + "grad_norm": 17.631208419799805, + "learning_rate": 2.927486535150267e-05, + "loss": 2.018, + "step": 35 + }, + { + "epoch": 0.2244039270687237, + "grad_norm": 26.445283889770508, + "learning_rate": 2.8758169522581795e-05, + "loss": 2.2511, + "step": 40 + }, + { + "epoch": 0.25245441795231416, + "grad_norm": 20.476442337036133, + "learning_rate": 2.8110614533249145e-05, + "loss": 2.0709, + "step": 45 + }, + { + "epoch": 0.2805049088359046, + "grad_norm": 24.186227798461914, + "learning_rate": 2.7338359524230917e-05, + "loss": 1.9392, + "step": 50 + }, + { + "epoch": 0.3085553997194951, + "grad_norm": 23.62510871887207, + "learning_rate": 2.6448749705363208e-05, + "loss": 1.6194, + "step": 55 + }, + { + "epoch": 0.33660589060308554, + "grad_norm": 29.262161254882812, + "learning_rate": 2.54502464925197e-05, + "loss": 2.0669, + "step": 60 + }, + { + "epoch": 0.364656381486676, + "grad_norm": 27.881044387817383, + "learning_rate": 2.4352347027881003e-05, + "loss": 1.6653, + "step": 65 + }, + { + "epoch": 0.39270687237026647, + "grad_norm": 27.448272705078125, + "learning_rate": 2.3165493849018966e-05, + "loss": 1.5163, + "step": 70 + }, + { + "epoch": 0.42075736325385693, + "grad_norm": 26.590068817138672, + "learning_rate": 2.1900975565967284e-05, + "loss": 1.6161, + "step": 75 + }, + { + "epoch": 0.4488078541374474, + "grad_norm": 35.092567443847656, + "learning_rate": 2.0570819490976602e-05, + "loss": 1.3044, + "step": 80 + }, + { + "epoch": 0.47685834502103785, + "grad_norm": 32.491294860839844, + "learning_rate": 1.918767724219309e-05, + "loss": 1.4359, + "step": 85 + }, + { + "epoch": 0.5049088359046283, + "grad_norm": 29.289979934692383, + "learning_rate": 1.776470440932719e-05, + "loss": 1.1781, + "step": 90 + }, + { + "epoch": 0.5329593267882188, + "grad_norm": 24.914491653442383, + "learning_rate": 1.6315435425858007e-05, + "loss": 1.0533, + "step": 95 + }, + { + "epoch": 0.5610098176718092, + "grad_norm": 40.441261291503906, + "learning_rate": 1.485365483791113e-05, + "loss": 0.9134, + "step": 100 + }, + { + "epoch": 0.5890603085553997, + "grad_norm": 30.625120162963867, + "learning_rate": 1.339326619422034e-05, + "loss": 1.0151, + "step": 105 + }, + { + "epoch": 0.6171107994389902, + "grad_norm": 26.532800674438477, + "learning_rate": 1.1948159804210497e-05, + "loss": 0.9814, + "step": 110 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 29.608884811401367, + "learning_rate": 1.0532080622004453e-05, + "loss": 1.0072, + "step": 115 + }, + { + "epoch": 0.6732117812061711, + "grad_norm": 36.67369079589844, + "learning_rate": 9.158497512959335e-06, + "loss": 0.8997, + "step": 120 + }, + { + "epoch": 0.7012622720897616, + "grad_norm": 18.671070098876953, + "learning_rate": 7.840475146187582e-06, + "loss": 0.8339, + "step": 125 + }, + { + "epoch": 0.729312762973352, + "grad_norm": 26.986202239990234, + "learning_rate": 6.590549731541573e-06, + "loss": 0.8898, + "step": 130 + }, + { + "epoch": 0.7573632538569425, + "grad_norm": 14.721198081970215, + "learning_rate": 5.4206097829743856e-06, + "loss": 0.7543, + "step": 135 + }, + { + "epoch": 0.7854137447405329, + "grad_norm": 34.5550537109375, + "learning_rate": 4.341783042381501e-06, + "loss": 1.3716, + "step": 140 + }, + { + "epoch": 0.8134642356241234, + "grad_norm": 28.700698852539062, + "learning_rate": 3.3643306394337016e-06, + "loss": 0.733, + "step": 145 + }, + { + "epoch": 0.8415147265077139, + "grad_norm": 23.695711135864258, + "learning_rate": 2.4975494940869687e-06, + "loss": 0.7755, + "step": 150 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 20.248140335083008, + "learning_rate": 1.7496838900561229e-06, + "loss": 0.6773, + "step": 155 + }, + { + "epoch": 0.8976157082748948, + "grad_norm": 39.453590393066406, + "learning_rate": 1.127847060310403e-06, + "loss": 0.6917, + "step": 160 + }, + { + "epoch": 0.9256661991584852, + "grad_norm": 32.13628387451172, + "learning_rate": 6.3795353042096e-07, + "loss": 0.79, + "step": 165 + }, + { + "epoch": 0.9537166900420757, + "grad_norm": 32.2395133972168, + "learning_rate": 2.8466286326840676e-07, + "loss": 1.0822, + "step": 170 + }, + { + "epoch": 0.9817671809256662, + "grad_norm": 22.426660537719727, + "learning_rate": 7.133534017569998e-08, + "loss": 0.9237, + "step": 175 + }, + { + "epoch": 1.0, + "step": 179, + "total_flos": 4.044527795919913e+17, + "train_loss": 2.9361699932780345, + "train_runtime": 285.2365, + "train_samples_per_second": 9.995, + "train_steps_per_second": 0.628 + } + ], + "logging_steps": 5, + "max_steps": 179, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.044527795919913e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/training_args.bin b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..932b7741b335130470f943c1d79f49b7ff913870 --- /dev/null +++ b/barexam_qa_skill_paraphrase_4_instruct_3epoch_no_reasoning/64_e1_3.0e-05/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba8c6f0f3a527ad8f427e931acfbd621b2c9a38b016d6bc6edb3118cd703881 +size 8081 diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/README.md b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ca016263e3cd730b4ae13c7ab02c60ddedecf98a --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/test/processed/knowledge_117 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/test/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/adapter_config.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5dc9ee5f48b867fe718fdfb0702f62fd849a0d2a --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "v_proj", + "down_proj", + "q_proj", + "k_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db1509942b60dc83877fb270e7f9ed630d6a4ce0 --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1af2be9bcfefa9b51c896aee4ee717a9edc7ab01345073aef8531c8de83c56e +size 671150064 diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/all_results.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbcde7298af6c00c0b65085fd4007508c34e3b9f --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.60333577156821e+17, + "train_loss": 0.31228866626317253, + "train_runtime": 401.4994, + "train_samples": 6318, + "train_samples_per_second": 47.208, + "train_steps_per_second": 1.479 +} \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/config.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/special_tokens_map.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer_config.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/train_results.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbcde7298af6c00c0b65085fd4007508c34e3b9f --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.60333577156821e+17, + "train_loss": 0.31228866626317253, + "train_runtime": 401.4994, + "train_samples": 6318, + "train_samples_per_second": 47.208, + "train_steps_per_second": 1.479 +} \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/trainer_state.json b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7472cc3a3bbbdc8a42b53976c7a77efed27732c5 --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.4280351400375366, + "learning_rate": 4e-06, + "loss": 1.8274, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.849953532218933, + "learning_rate": 9e-06, + "loss": 1.8099, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.2468836307525635, + "learning_rate": 1.4e-05, + "loss": 1.564, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.2246865034103394, + "learning_rate": 1.9e-05, + "loss": 1.5734, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.4143412113189697, + "learning_rate": 2.4e-05, + "loss": 1.4939, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.2752662897109985, + "learning_rate": 2.9e-05, + "loss": 1.4479, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.5475965738296509, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.244, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.3672237396240234, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.2106, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.866748571395874, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1104, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.6675803661346436, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0126, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.9607398509979248, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.9993, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.8535140752792358, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8729, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.857739806175232, + "learning_rate": 2.973179790633317e-05, + "loss": 0.7807, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.9827221632003784, + "learning_rate": 2.964744782530777e-05, + "loss": 0.7337, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 2.376154661178589, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7279, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.7229505777359009, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6526, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.845930576324463, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5682, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.916355013847351, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5611, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.9546066522598267, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.5177, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.57351815700531, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5016, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 2.1229121685028076, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4285, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.5255433320999146, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4086, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 2.0740153789520264, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3675, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 2.061563014984131, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3887, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.7592610120773315, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3457, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.6467878818511963, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3381, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.6201211214065552, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3405, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.7711594104766846, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3096, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.5649070739746094, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3074, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.6355904340744019, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3234, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.6582238674163818, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.282, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.5159419775009155, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2752, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.7345476150512695, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2595, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.3734257221221924, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2752, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.226012945175171, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2507, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.2453619241714478, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2253, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.5106444358825684, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2341, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.5092755556106567, + "learning_rate": 2.449165826796448e-05, + "loss": 0.227, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.2947322130203247, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2395, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.2164002656936646, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1891, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.3368251323699951, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1705, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.4447877407073975, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1843, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.2546546459197998, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1677, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.2638579607009888, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1821, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.276183843612671, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1795, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.0791845321655273, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1649, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.30581796169281, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1628, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.0786525011062622, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1695, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.5041277408599854, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1716, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.1344386339187622, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1746, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 0.913921058177948, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1639, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 1.051873803138733, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1642, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.0783296823501587, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1684, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.0006561279296875, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1552, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 1.0802558660507202, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.149, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.7927950620651245, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1443, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.9342193603515625, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1512, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.7691694498062134, + "learning_rate": 1.69164662490578e-05, + "loss": 0.139, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.9960677623748779, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1426, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.2629998922348022, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1507, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.7555195093154907, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1378, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.6860382556915283, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1341, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.7692504525184631, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1404, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.6983894109725952, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1274, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.7932406067848206, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1306, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.6868723034858704, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1315, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.8811209797859192, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1377, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.7519631385803223, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1303, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.8700816035270691, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1304, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.9217109084129333, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1279, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.6051235795021057, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.121, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.9966759085655212, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.127, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.5951815843582153, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.128, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.6469151973724365, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1135, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.6306494474411011, + "learning_rate": 9.922075858704368e-06, + "loss": 0.117, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.7365113496780396, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1225, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.9501201510429382, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1269, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.665561854839325, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1324, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.5945193767547607, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1171, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5264803171157837, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1097, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5449300408363342, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1177, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.5583174824714661, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1144, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.7140135169029236, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1099, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.5383283495903015, + "learning_rate": 6.579809488801994e-06, + "loss": 0.0997, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.5199109315872192, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1135, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.5060058832168579, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1098, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.42820051312446594, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1149, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.6197940707206726, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1136, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.5017422437667847, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1066, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.39661988615989685, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1119, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.46620187163352966, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1102, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.6426394581794739, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1146, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.40909916162490845, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1031, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.5187073349952698, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1024, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.6129388809204102, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1168, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.4317379295825958, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.1039, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.4640291929244995, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1113, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.41769540309906006, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1076, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.4984108805656433, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1071, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.4344765841960907, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1095, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.4730183482170105, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1094, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.5213469862937927, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0975, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.473433256149292, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.1043, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.46372050046920776, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0967, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.5315404534339905, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.103, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.4551044702529907, + "learning_rate": 9.724785471955566e-07, + "loss": 0.103, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.4812636375427246, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1019, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.46122288703918457, + "learning_rate": 6.984393814019885e-07, + "loss": 0.101, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.46437540650367737, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0979, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.33700236678123474, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0934, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.5136511921882629, + "learning_rate": 3.707877563706158e-07, + "loss": 0.0998, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.4278213083744049, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0999, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.4172161817550659, + "learning_rate": 2.089461901495715e-07, + "loss": 0.1001, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.39768633246421814, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0907, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.43440350890159607, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1052, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4117976129055023, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0976, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.4611513912677765, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1024, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.46728166937828064, + "learning_rate": 5.817209927129752e-09, + "loss": 0.099, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.60333577156821e+17, + "train_loss": 0.31228866626317253, + "train_runtime": 401.4994, + "train_samples_per_second": 47.208, + "train_steps_per_second": 1.479 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.60333577156821e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/training_args.bin b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c7fc644f8dd3dff4ceee1e55e3c3c8c4d4d0736 --- /dev/null +++ b/barexam_qa_test_knowledge_100_base/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508f30163575d5d2b5931c6c2365df8848b342a2fc9b8e47ab1847b54aaa104b +size 8273 diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/README.md b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e61d0105ece182b0339ee2835ed821eaf2f4df67 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/test/processed/knowledge_117 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/test/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/adapter_config.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d79ecf406898321e3430517f28708222aedbc490 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "gate_proj", + "up_proj", + "q_proj", + "o_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..424abad968e891c82b0428e309d8a9408462c774 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24baa42915f793cf891e2ec873128d9d01edaf36ef72c34ecb01e612baa15c47 +size 671150064 diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/all_results.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..39fa17c37837e061cbe45d45ee0e2196d72653d3 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.60333577156821e+17, + "train_loss": 0.310583806951038, + "train_runtime": 387.7622, + "train_samples": 6318, + "train_samples_per_second": 48.88, + "train_steps_per_second": 1.532 +} \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/chat_template.jinja b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/config.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/special_tokens_map.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer_config.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/train_results.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..39fa17c37837e061cbe45d45ee0e2196d72653d3 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.60333577156821e+17, + "train_loss": 0.310583806951038, + "train_runtime": 387.7622, + "train_samples": 6318, + "train_samples_per_second": 48.88, + "train_steps_per_second": 1.532 +} \ No newline at end of file diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/trainer_state.json b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4d6eee6152cd2ec84be4f08e649bfafc49d25104 --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.974959135055542, + "learning_rate": 4e-06, + "loss": 1.9111, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 2.1687111854553223, + "learning_rate": 9e-06, + "loss": 1.8938, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.3972694873809814, + "learning_rate": 1.4e-05, + "loss": 1.6529, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.2484467029571533, + "learning_rate": 1.9e-05, + "loss": 1.6502, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.4021843671798706, + "learning_rate": 2.4e-05, + "loss": 1.537, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.2612180709838867, + "learning_rate": 2.9e-05, + "loss": 1.4877, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.3430818319320679, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2728, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.3731223344802856, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.2282, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.6660759449005127, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1203, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.6734846830368042, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0011, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 2.0348196029663086, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.991, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.9436084032058716, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8602, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.723400354385376, + "learning_rate": 2.973179790633317e-05, + "loss": 0.7678, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.8706308603286743, + "learning_rate": 2.964744782530777e-05, + "loss": 0.7198, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 2.333040714263916, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7143, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.6307823657989502, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6287, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.731980323791504, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5511, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.8409576416015625, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5415, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.7212445735931396, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4991, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.676977515220642, + "learning_rate": 2.890567022607206e-05, + "loss": 0.4852, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.8831697702407837, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4123, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.4570202827453613, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.3909, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.640596866607666, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3515, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 2.1996631622314453, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3719, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.6384434700012207, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.333, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.5225738286972046, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3205, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.5588840246200562, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3242, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.665793776512146, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.2946, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.5262067317962646, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.295, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.5409502983093262, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3072, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.6780145168304443, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2685, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.4104974269866943, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2637, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.5888535976409912, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.248, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.4139901399612427, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2547, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.2538520097732544, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.234, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.4064397811889648, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2072, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.3119398355484009, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2275, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.3898496627807617, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2127, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.274878740310669, + "learning_rate": 2.416452950136248e-05, + "loss": 0.227, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.1675902605056763, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1729, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.1460307836532593, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1654, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.2071938514709473, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1778, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.0471787452697754, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1595, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.2782766819000244, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1758, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.2619632482528687, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1731, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.0919606685638428, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1535, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.2030103206634521, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1544, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.0232347249984741, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1625, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.026922345161438, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1666, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.012614130973816, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1677, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 0.9912634491920471, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1558, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.9957408905029297, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1579, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.0271358489990234, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1607, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 0.9509857296943665, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1523, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 0.8841871619224548, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1428, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.7403998374938965, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1398, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.81593257188797, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1445, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.6752234101295471, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1369, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.7800904512405396, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1353, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.2925987243652344, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1504, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.7807974219322205, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1336, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.7744953632354736, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1326, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.7171505689620972, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1406, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.6437786817550659, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1224, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.7635287642478943, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1255, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.6448877453804016, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1298, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.8078610897064209, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1355, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.7144020795822144, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1295, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.8078051209449768, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1276, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.6044667363166809, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1235, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.7810867428779602, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1184, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.7266767024993896, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1253, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.528647243976593, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1261, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.7053439617156982, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1114, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.6284416913986206, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1162, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.6624728441238403, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1201, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.5484668612480164, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1222, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.49242109060287476, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1242, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.5310856699943542, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1133, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.597822368144989, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1075, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.47414398193359375, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1159, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.556117832660675, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1137, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.5261185765266418, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1082, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.42120039463043213, + "learning_rate": 6.579809488801994e-06, + "loss": 0.0985, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.4571400582790375, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1126, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.5031740069389343, + "learning_rate": 5.901757461493989e-06, + "loss": 0.108, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.40054717659950256, + "learning_rate": 5.573186564064649e-06, + "loss": 0.114, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.579735517501831, + "learning_rate": 5.25192736699541e-06, + "loss": 0.113, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.3922980725765228, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1055, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.364778608083725, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1106, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.40811723470687866, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1094, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.5623596906661987, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1129, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.3576464354991913, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1036, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.46962887048721313, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1026, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.6268278360366821, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1153, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.5886241793632507, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.1039, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.3824120759963989, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1102, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.3859897553920746, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1083, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.4059358835220337, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1053, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.37626129388809204, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1084, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.4535799026489258, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1091, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.4682500660419464, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0971, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.4456654489040375, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.1042, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.40476614236831665, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0963, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.4900675117969513, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.1019, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.41516566276550293, + "learning_rate": 9.724785471955566e-07, + "loss": 0.1024, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.40812745690345764, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1009, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.4146260917186737, + "learning_rate": 6.984393814019885e-07, + "loss": 0.1003, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.43576693534851074, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0966, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.32585325837135315, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0932, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.5520027279853821, + "learning_rate": 3.707877563706158e-07, + "loss": 0.0997, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.3795071542263031, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0999, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.4021361172199249, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0996, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.3569563925266266, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0905, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.4080843925476074, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1049, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4093749225139618, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0969, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.41959619522094727, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1019, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.4787942171096802, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0988, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.60333577156821e+17, + "train_loss": 0.310583806951038, + "train_runtime": 387.7622, + "train_samples_per_second": 48.88, + "train_steps_per_second": 1.532 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.60333577156821e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/training_args.bin b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b8d3ab88b6fc65435df3193300ef3ed8354039f --- /dev/null +++ b/barexam_qa_test_knowledge_100_instruct/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459785237dbaaa617270d4ea4ff5ce67dabfcd2da3f907604e70553cc1ebaad0 +size 8337 diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ec20a57ce837f0aa9d96e0dd826a403f0d99cf4 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a4721bfa6b754904a1d4c9df9ca9d77f3b54278 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "q_proj", + "k_proj", + "down_proj", + "gate_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..243d6e0603dc13c3c1ab3fc05626c8553f7c5d6d --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11976535d4a5ecbd4a9fea31a46c874a6b7a99682224c831d5c7f00abc5a784c +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..525a9708b1880fa6ca76e6a411287bf3e1abbbd4 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.907839192876974e+17, + "train_loss": 0.31086180552008186, + "train_runtime": 395.4411, + "train_samples": 6285, + "train_samples_per_second": 47.681, + "train_steps_per_second": 1.495 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..525a9708b1880fa6ca76e6a411287bf3e1abbbd4 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.907839192876974e+17, + "train_loss": 0.31086180552008186, + "train_runtime": 395.4411, + "train_samples": 6285, + "train_samples_per_second": 47.681, + "train_steps_per_second": 1.495 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..78b5d387190ea2a360e64d609451ec57f612b46f --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 591, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02544529262086514, + "grad_norm": 1.750571608543396, + "learning_rate": 4e-06, + "loss": 1.7637, + "step": 5 + }, + { + "epoch": 0.05089058524173028, + "grad_norm": 1.7784054279327393, + "learning_rate": 9e-06, + "loss": 1.6717, + "step": 10 + }, + { + "epoch": 0.07633587786259542, + "grad_norm": 1.3094106912612915, + "learning_rate": 1.4e-05, + "loss": 1.594, + "step": 15 + }, + { + "epoch": 0.10178117048346055, + "grad_norm": 1.2621792554855347, + "learning_rate": 1.9e-05, + "loss": 1.5076, + "step": 20 + }, + { + "epoch": 0.1272264631043257, + "grad_norm": 1.1610896587371826, + "learning_rate": 2.4e-05, + "loss": 1.3977, + "step": 25 + }, + { + "epoch": 0.15267175572519084, + "grad_norm": 1.94066321849823, + "learning_rate": 2.9e-05, + "loss": 1.3493, + "step": 30 + }, + { + "epoch": 0.178117048346056, + "grad_norm": 1.6791846752166748, + "learning_rate": 2.9996236974947768e-05, + "loss": 1.2064, + "step": 35 + }, + { + "epoch": 0.2035623409669211, + "grad_norm": 1.6765260696411133, + "learning_rate": 2.998095292147853e-05, + "loss": 1.2424, + "step": 40 + }, + { + "epoch": 0.22900763358778625, + "grad_norm": 1.6117877960205078, + "learning_rate": 2.9953924623165958e-05, + "loss": 1.1219, + "step": 45 + }, + { + "epoch": 0.2544529262086514, + "grad_norm": 1.3426357507705688, + "learning_rate": 2.9915173268712462e-05, + "loss": 1.1071, + "step": 50 + }, + { + "epoch": 0.27989821882951654, + "grad_norm": 1.5179742574691772, + "learning_rate": 2.9864729237053014e-05, + "loss": 1.0051, + "step": 55 + }, + { + "epoch": 0.3053435114503817, + "grad_norm": 1.5949455499649048, + "learning_rate": 2.9802632073539752e-05, + "loss": 0.9056, + "step": 60 + }, + { + "epoch": 0.33078880407124683, + "grad_norm": 1.647726058959961, + "learning_rate": 2.97289304589406e-05, + "loss": 0.8795, + "step": 65 + }, + { + "epoch": 0.356234096692112, + "grad_norm": 1.857741117477417, + "learning_rate": 2.9643682171276208e-05, + "loss": 0.8162, + "step": 70 + }, + { + "epoch": 0.3816793893129771, + "grad_norm": 1.546259880065918, + "learning_rate": 2.9546954040525144e-05, + "loss": 0.7579, + "step": 75 + }, + { + "epoch": 0.4071246819338422, + "grad_norm": 1.682631015777588, + "learning_rate": 2.9438821896232884e-05, + "loss": 0.7188, + "step": 80 + }, + { + "epoch": 0.43256997455470736, + "grad_norm": 1.8948148488998413, + "learning_rate": 2.93193705080656e-05, + "loss": 0.6549, + "step": 85 + }, + { + "epoch": 0.4580152671755725, + "grad_norm": 2.033395290374756, + "learning_rate": 2.9188693519355373e-05, + "loss": 0.6004, + "step": 90 + }, + { + "epoch": 0.48346055979643765, + "grad_norm": 1.6254369020462036, + "learning_rate": 2.9046893373689004e-05, + "loss": 0.5619, + "step": 95 + }, + { + "epoch": 0.5089058524173028, + "grad_norm": 2.2799019813537598, + "learning_rate": 2.8894081234597826e-05, + "loss": 0.5236, + "step": 100 + }, + { + "epoch": 0.5343511450381679, + "grad_norm": 1.793721318244934, + "learning_rate": 2.873037689841161e-05, + "loss": 0.4836, + "step": 105 + }, + { + "epoch": 0.5597964376590331, + "grad_norm": 2.06807279586792, + "learning_rate": 2.8555908700344826e-05, + "loss": 0.4255, + "step": 110 + }, + { + "epoch": 0.5852417302798982, + "grad_norm": 1.8842345476150513, + "learning_rate": 2.837081341388887e-05, + "loss": 0.399, + "step": 115 + }, + { + "epoch": 0.6106870229007634, + "grad_norm": 1.7868107557296753, + "learning_rate": 2.8175236143589143e-05, + "loss": 0.4273, + "step": 120 + }, + { + "epoch": 0.6361323155216285, + "grad_norm": 1.6490942239761353, + "learning_rate": 2.7969330211291083e-05, + "loss": 0.3685, + "step": 125 + }, + { + "epoch": 0.6615776081424937, + "grad_norm": 1.8219431638717651, + "learning_rate": 2.7753257035944216e-05, + "loss": 0.3953, + "step": 130 + }, + { + "epoch": 0.6870229007633588, + "grad_norm": 1.7342441082000732, + "learning_rate": 2.7527186007058584e-05, + "loss": 0.3485, + "step": 135 + }, + { + "epoch": 0.712468193384224, + "grad_norm": 1.3871192932128906, + "learning_rate": 2.729129435191267e-05, + "loss": 0.3272, + "step": 140 + }, + { + "epoch": 0.7379134860050891, + "grad_norm": 1.6602753400802612, + "learning_rate": 2.7045766996616914e-05, + "loss": 0.3232, + "step": 145 + }, + { + "epoch": 0.7633587786259542, + "grad_norm": 1.720905065536499, + "learning_rate": 2.6790796421141813e-05, + "loss": 0.3018, + "step": 150 + }, + { + "epoch": 0.7888040712468194, + "grad_norm": 1.6346253156661987, + "learning_rate": 2.652658250842418e-05, + "loss": 0.2518, + "step": 155 + }, + { + "epoch": 0.8142493638676844, + "grad_norm": 1.5205072164535522, + "learning_rate": 2.6253332387669896e-05, + "loss": 0.2721, + "step": 160 + }, + { + "epoch": 0.8396946564885496, + "grad_norm": 1.5182818174362183, + "learning_rate": 2.597126027197598e-05, + "loss": 0.2741, + "step": 165 + }, + { + "epoch": 0.8651399491094147, + "grad_norm": 1.4023336172103882, + "learning_rate": 2.5680587290399283e-05, + "loss": 0.2388, + "step": 170 + }, + { + "epoch": 0.8905852417302799, + "grad_norm": 1.4704838991165161, + "learning_rate": 2.5381541314603425e-05, + "loss": 0.246, + "step": 175 + }, + { + "epoch": 0.916030534351145, + "grad_norm": 1.4038188457489014, + "learning_rate": 2.5074356780219952e-05, + "loss": 0.2226, + "step": 180 + }, + { + "epoch": 0.9414758269720102, + "grad_norm": 1.269085168838501, + "learning_rate": 2.4759274503063632e-05, + "loss": 0.2309, + "step": 185 + }, + { + "epoch": 0.9669211195928753, + "grad_norm": 1.4865037202835083, + "learning_rate": 2.44365414903461e-05, + "loss": 0.2356, + "step": 190 + }, + { + "epoch": 0.9923664122137404, + "grad_norm": 1.2510932683944702, + "learning_rate": 2.410641074703575e-05, + "loss": 0.2516, + "step": 195 + }, + { + "epoch": 1.015267175572519, + "grad_norm": 1.2310494184494019, + "learning_rate": 2.3769141077515717e-05, + "loss": 0.1917, + "step": 200 + }, + { + "epoch": 1.0407124681933841, + "grad_norm": 1.277191400527954, + "learning_rate": 2.342499688269547e-05, + "loss": 0.1838, + "step": 205 + }, + { + "epoch": 1.0661577608142494, + "grad_norm": 1.3444560766220093, + "learning_rate": 2.3074247952734994e-05, + "loss": 0.1877, + "step": 210 + }, + { + "epoch": 1.0916030534351144, + "grad_norm": 1.25050950050354, + "learning_rate": 2.271716925554411e-05, + "loss": 0.1696, + "step": 215 + }, + { + "epoch": 1.1170483460559797, + "grad_norm": 1.0814458131790161, + "learning_rate": 2.2354040721222733e-05, + "loss": 0.1577, + "step": 220 + }, + { + "epoch": 1.1424936386768447, + "grad_norm": 0.9774757027626038, + "learning_rate": 2.198514702261104e-05, + "loss": 0.1734, + "step": 225 + }, + { + "epoch": 1.16793893129771, + "grad_norm": 1.142044186592102, + "learning_rate": 2.1610777352121578e-05, + "loss": 0.1694, + "step": 230 + }, + { + "epoch": 1.193384223918575, + "grad_norm": 1.052420973777771, + "learning_rate": 2.12312251950283e-05, + "loss": 0.1576, + "step": 235 + }, + { + "epoch": 1.2188295165394403, + "grad_norm": 1.1398721933364868, + "learning_rate": 2.084678809939019e-05, + "loss": 0.1601, + "step": 240 + }, + { + "epoch": 1.2442748091603053, + "grad_norm": 0.9079051613807678, + "learning_rate": 2.0457767442789962e-05, + "loss": 0.1651, + "step": 245 + }, + { + "epoch": 1.2697201017811706, + "grad_norm": 1.2689692974090576, + "learning_rate": 2.0064468196070533e-05, + "loss": 0.1546, + "step": 250 + }, + { + "epoch": 1.2951653944020356, + "grad_norm": 1.2862428426742554, + "learning_rate": 1.9667198684254643e-05, + "loss": 0.1625, + "step": 255 + }, + { + "epoch": 1.3206106870229006, + "grad_norm": 1.341110110282898, + "learning_rate": 1.9266270344834946e-05, + "loss": 0.1721, + "step": 260 + }, + { + "epoch": 1.3460559796437659, + "grad_norm": 0.9074805974960327, + "learning_rate": 1.8861997483624136e-05, + "loss": 0.1503, + "step": 265 + }, + { + "epoch": 1.3715012722646311, + "grad_norm": 1.2487558126449585, + "learning_rate": 1.8454697028356413e-05, + "loss": 0.1558, + "step": 270 + }, + { + "epoch": 1.3969465648854962, + "grad_norm": 0.8811066150665283, + "learning_rate": 1.8044688280233543e-05, + "loss": 0.1364, + "step": 275 + }, + { + "epoch": 1.4223918575063612, + "grad_norm": 0.8572193384170532, + "learning_rate": 1.7632292663610245e-05, + "loss": 0.1393, + "step": 280 + }, + { + "epoch": 1.4478371501272265, + "grad_norm": 0.818264901638031, + "learning_rate": 1.721783347401513e-05, + "loss": 0.1439, + "step": 285 + }, + { + "epoch": 1.4732824427480917, + "grad_norm": 1.0029388666152954, + "learning_rate": 1.6801635624704777e-05, + "loss": 0.1392, + "step": 290 + }, + { + "epoch": 1.4987277353689568, + "grad_norm": 1.203516960144043, + "learning_rate": 1.638402539194953e-05, + "loss": 0.1517, + "step": 295 + }, + { + "epoch": 1.5241730279898218, + "grad_norm": 1.0503861904144287, + "learning_rate": 1.5965330159250847e-05, + "loss": 0.1392, + "step": 300 + }, + { + "epoch": 1.549618320610687, + "grad_norm": 1.0111517906188965, + "learning_rate": 1.5545878160690586e-05, + "loss": 0.1304, + "step": 305 + }, + { + "epoch": 1.5750636132315523, + "grad_norm": 1.0208218097686768, + "learning_rate": 1.5125998223613501e-05, + "loss": 0.132, + "step": 310 + }, + { + "epoch": 1.6005089058524173, + "grad_norm": 1.003890037536621, + "learning_rate": 1.4706019510844666e-05, + "loss": 0.1234, + "step": 315 + }, + { + "epoch": 1.6259541984732824, + "grad_norm": 0.9037622213363647, + "learning_rate": 1.4286271262643866e-05, + "loss": 0.1171, + "step": 320 + }, + { + "epoch": 1.6513994910941476, + "grad_norm": 0.6694446802139282, + "learning_rate": 1.3867082538599317e-05, + "loss": 0.1143, + "step": 325 + }, + { + "epoch": 1.6768447837150129, + "grad_norm": 1.1683619022369385, + "learning_rate": 1.3448781959663005e-05, + "loss": 0.123, + "step": 330 + }, + { + "epoch": 1.7022900763358777, + "grad_norm": 0.8081491589546204, + "learning_rate": 1.3031697450529904e-05, + "loss": 0.1297, + "step": 335 + }, + { + "epoch": 1.727735368956743, + "grad_norm": 0.8821333050727844, + "learning_rate": 1.2616155982563004e-05, + "loss": 0.1193, + "step": 340 + }, + { + "epoch": 1.7531806615776082, + "grad_norm": 0.9094138145446777, + "learning_rate": 1.2202483317465706e-05, + "loss": 0.1252, + "step": 345 + }, + { + "epoch": 1.7786259541984732, + "grad_norm": 1.0687609910964966, + "learning_rate": 1.1791003751902542e-05, + "loss": 0.1214, + "step": 350 + }, + { + "epoch": 1.8040712468193383, + "grad_norm": 0.8566547632217407, + "learning_rate": 1.1382039863268376e-05, + "loss": 0.1163, + "step": 355 + }, + { + "epoch": 1.8295165394402035, + "grad_norm": 0.7271131873130798, + "learning_rate": 1.0975912256805437e-05, + "loss": 0.1148, + "step": 360 + }, + { + "epoch": 1.8549618320610688, + "grad_norm": 0.7480084300041199, + "learning_rate": 1.0572939314266403e-05, + "loss": 0.1135, + "step": 365 + }, + { + "epoch": 1.8804071246819338, + "grad_norm": 1.0015650987625122, + "learning_rate": 1.0173436944320583e-05, + "loss": 0.1187, + "step": 370 + }, + { + "epoch": 1.9058524173027989, + "grad_norm": 0.7261539101600647, + "learning_rate": 9.777718334898859e-06, + "loss": 0.1113, + "step": 375 + }, + { + "epoch": 1.9312977099236641, + "grad_norm": 0.8191275596618652, + "learning_rate": 9.386093707671545e-06, + "loss": 0.1172, + "step": 380 + }, + { + "epoch": 1.9567430025445294, + "grad_norm": 0.5591948628425598, + "learning_rate": 8.998870074851604e-06, + "loss": 0.1089, + "step": 385 + }, + { + "epoch": 1.9821882951653944, + "grad_norm": 0.6376784443855286, + "learning_rate": 8.61635099851395e-06, + "loss": 0.1049, + "step": 390 + }, + { + "epoch": 2.005089058524173, + "grad_norm": 0.4394124448299408, + "learning_rate": 8.238836352619426e-06, + "loss": 0.1126, + "step": 395 + }, + { + "epoch": 2.030534351145038, + "grad_norm": 0.6266621947288513, + "learning_rate": 7.866622087930076e-06, + "loss": 0.1022, + "step": 400 + }, + { + "epoch": 2.0559796437659035, + "grad_norm": 0.4601390063762665, + "learning_rate": 7.500000000000004e-06, + "loss": 0.102, + "step": 405 + }, + { + "epoch": 2.0814249363867683, + "grad_norm": 0.7355275750160217, + "learning_rate": 7.1392575004236655e-06, + "loss": 0.1041, + "step": 410 + }, + { + "epoch": 2.1068702290076335, + "grad_norm": 0.5902755856513977, + "learning_rate": 6.7846773915209535e-06, + "loss": 0.1034, + "step": 415 + }, + { + "epoch": 2.132315521628499, + "grad_norm": 0.8332865834236145, + "learning_rate": 6.436537644635706e-06, + "loss": 0.1009, + "step": 420 + }, + { + "epoch": 2.157760814249364, + "grad_norm": 0.40669429302215576, + "learning_rate": 6.0951111822214225e-06, + "loss": 0.1033, + "step": 425 + }, + { + "epoch": 2.183206106870229, + "grad_norm": 0.46925076842308044, + "learning_rate": 5.760665663885047e-06, + "loss": 0.1059, + "step": 430 + }, + { + "epoch": 2.208651399491094, + "grad_norm": 0.4888313412666321, + "learning_rate": 5.43346327655652e-06, + "loss": 0.1013, + "step": 435 + }, + { + "epoch": 2.2340966921119594, + "grad_norm": 0.6651352643966675, + "learning_rate": 5.113760528948623e-06, + "loss": 0.0958, + "step": 440 + }, + { + "epoch": 2.2595419847328246, + "grad_norm": 0.5465208292007446, + "learning_rate": 4.80180805046822e-06, + "loss": 0.0966, + "step": 445 + }, + { + "epoch": 2.2849872773536894, + "grad_norm": 0.3938451111316681, + "learning_rate": 4.497850394736564e-06, + "loss": 0.0984, + "step": 450 + }, + { + "epoch": 2.3104325699745547, + "grad_norm": 0.6080864667892456, + "learning_rate": 4.202125847872678e-06, + "loss": 0.0992, + "step": 455 + }, + { + "epoch": 2.33587786259542, + "grad_norm": 0.7755571603775024, + "learning_rate": 3.914866241690115e-06, + "loss": 0.1094, + "step": 460 + }, + { + "epoch": 2.3613231552162848, + "grad_norm": 0.6593393683433533, + "learning_rate": 3.6362967719535444e-06, + "loss": 0.0989, + "step": 465 + }, + { + "epoch": 2.38676844783715, + "grad_norm": 1.8429594039916992, + "learning_rate": 3.3666358218376274e-06, + "loss": 0.09, + "step": 470 + }, + { + "epoch": 2.4122137404580153, + "grad_norm": 0.638966977596283, + "learning_rate": 3.106094790726594e-06, + "loss": 0.094, + "step": 475 + }, + { + "epoch": 2.4376590330788805, + "grad_norm": 0.5463896989822388, + "learning_rate": 2.8548779284887443e-06, + "loss": 0.0992, + "step": 480 + }, + { + "epoch": 2.4631043256997454, + "grad_norm": 0.469995379447937, + "learning_rate": 2.6131821753557395e-06, + "loss": 0.0962, + "step": 485 + }, + { + "epoch": 2.4885496183206106, + "grad_norm": 0.4856208562850952, + "learning_rate": 2.38119700753228e-06, + "loss": 0.0942, + "step": 490 + }, + { + "epoch": 2.513994910941476, + "grad_norm": 0.4015108346939087, + "learning_rate": 2.159104288657164e-06, + "loss": 0.0906, + "step": 495 + }, + { + "epoch": 2.539440203562341, + "grad_norm": 0.5012176632881165, + "learning_rate": 1.947078127232169e-06, + "loss": 0.0977, + "step": 500 + }, + { + "epoch": 2.564885496183206, + "grad_norm": 0.592395007610321, + "learning_rate": 1.74528474013055e-06, + "loss": 0.0901, + "step": 505 + }, + { + "epoch": 2.590330788804071, + "grad_norm": 0.5032354593276978, + "learning_rate": 1.5538823222921288e-06, + "loss": 0.0966, + "step": 510 + }, + { + "epoch": 2.6157760814249365, + "grad_norm": 0.5990813970565796, + "learning_rate": 1.3730209227071439e-06, + "loss": 0.0933, + "step": 515 + }, + { + "epoch": 2.6412213740458013, + "grad_norm": 0.4217401444911957, + "learning_rate": 1.2028423267860806e-06, + "loss": 0.0895, + "step": 520 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.538853645324707, + "learning_rate": 1.0434799452076915e-06, + "loss": 0.0847, + "step": 525 + }, + { + "epoch": 2.6921119592875318, + "grad_norm": 0.5196229815483093, + "learning_rate": 8.950587093323437e-07, + "loss": 0.0835, + "step": 530 + }, + { + "epoch": 2.717557251908397, + "grad_norm": 0.4257654845714569, + "learning_rate": 7.576949732626881e-07, + "loss": 0.0918, + "step": 535 + }, + { + "epoch": 2.7430025445292623, + "grad_norm": 0.4480805993080139, + "learning_rate": 6.314964226284381e-07, + "loss": 0.0868, + "step": 540 + }, + { + "epoch": 2.768447837150127, + "grad_norm": 0.4174075722694397, + "learning_rate": 5.165619901667313e-07, + "loss": 0.0932, + "step": 545 + }, + { + "epoch": 2.7938931297709924, + "grad_norm": 0.4397164285182953, + "learning_rate": 4.1298177816430917e-07, + "loss": 0.09, + "step": 550 + }, + { + "epoch": 2.8193384223918576, + "grad_norm": 0.4499697685241699, + "learning_rate": 3.2083698782226e-07, + "loss": 0.0886, + "step": 555 + }, + { + "epoch": 2.8447837150127224, + "grad_norm": 0.48477110266685486, + "learning_rate": 2.401998555987389e-07, + "loss": 0.0897, + "step": 560 + }, + { + "epoch": 2.8702290076335877, + "grad_norm": 0.4162387549877167, + "learning_rate": 1.7113359657954354e-07, + "loss": 0.0846, + "step": 565 + }, + { + "epoch": 2.895674300254453, + "grad_norm": 0.5290712118148804, + "learning_rate": 1.1369235492096397e-07, + "loss": 0.0841, + "step": 570 + }, + { + "epoch": 2.921119592875318, + "grad_norm": 0.4216464161872864, + "learning_rate": 6.792116140373117e-08, + "loss": 0.0922, + "step": 575 + }, + { + "epoch": 2.9465648854961835, + "grad_norm": 0.44119587540626526, + "learning_rate": 3.385589813135692e-08, + "loss": 0.095, + "step": 580 + }, + { + "epoch": 2.9720101781170483, + "grad_norm": 0.685880184173584, + "learning_rate": 1.1523270400535246e-08, + "loss": 0.0866, + "step": 585 + }, + { + "epoch": 2.9974554707379135, + "grad_norm": 0.43796560168266296, + "learning_rate": 9.407857656540398e-10, + "loss": 0.0903, + "step": 590 + }, + { + "epoch": 3.0, + "step": 591, + "total_flos": 1.907839192876974e+17, + "train_loss": 0.31086180552008186, + "train_runtime": 395.4411, + "train_samples_per_second": 47.681, + "train_steps_per_second": 1.495 + } + ], + "logging_steps": 5, + "max_steps": 591, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.907839192876974e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffa6933d321f1ce8388c0f80d16378abc39309b1 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f79714a4290b8dd88851647b60cf3acecda2cba3a91179b6d7a604e3f3919c8 +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9d854d902597a2bef11a00f353d4da1c144f7e77 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 1_128_e3_3e-5 + results: [] +--- + + + +# 1_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6049e955bb4efe4d9e2e172a4b440241557fdee3 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "k_proj", + "q_proj", + "gate_proj", + "o_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ac5eff92d59a7f20c64416bc4e872c4435c84cf --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72807a2b85ef3fa95aed1be10fe5b7057c87b86079805bbdcb95c866aec95789 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..824e53e34cf2f0b3ec0248a5710ab51324540973 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5021974776866406e+17, + "train_loss": 0.3117149337273254, + "train_runtime": 395.5776, + "train_samples": 6318, + "train_samples_per_second": 47.915, + "train_steps_per_second": 1.502 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..824e53e34cf2f0b3ec0248a5710ab51324540973 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5021974776866406e+17, + "train_loss": 0.3117149337273254, + "train_runtime": 395.5776, + "train_samples": 6318, + "train_samples_per_second": 47.915, + "train_steps_per_second": 1.502 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ab4874328f9db0c3bff613dd10ba501b7bf8f12e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.3796606063842773, + "learning_rate": 4e-06, + "loss": 1.8768, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.3583266735076904, + "learning_rate": 9e-06, + "loss": 1.7229, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.651982307434082, + "learning_rate": 1.4e-05, + "loss": 1.635, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.5616867542266846, + "learning_rate": 1.9e-05, + "loss": 1.5439, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.3711315393447876, + "learning_rate": 2.4e-05, + "loss": 1.5214, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.4612257480621338, + "learning_rate": 2.9e-05, + "loss": 1.3537, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.4473471641540527, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.241, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.869160771369934, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1164, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.5135219097137451, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.058, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.4957516193389893, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0184, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.637587547302246, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.8892, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.6584653854370117, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8395, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 2.011315107345581, + "learning_rate": 2.973179790633317e-05, + "loss": 0.7337, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.7427482604980469, + "learning_rate": 2.964744782530777e-05, + "loss": 0.6875, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.8485158681869507, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7086, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.9086769819259644, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6244, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.841145634651184, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5836, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.667880654335022, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5318, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.97666597366333, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4788, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.4563343524932861, + "learning_rate": 2.890567022607206e-05, + "loss": 0.4605, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 2.300769090652466, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4089, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.5398811101913452, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4083, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 2.1584463119506836, + "learning_rate": 2.838778253789822e-05, + "loss": 0.4107, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.6772433519363403, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3798, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.5752229690551758, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3594, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.4547830820083618, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3342, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 2.1172640323638916, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3213, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 2.2884950637817383, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3073, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.679686427116394, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3153, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.6194777488708496, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.2951, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.4156270027160645, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2663, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.6105653047561646, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2765, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.1306759119033813, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2502, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.7829797267913818, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2441, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.488068699836731, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2488, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.3937082290649414, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2391, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.4567296504974365, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2207, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.4297852516174316, + "learning_rate": 2.449165826796448e-05, + "loss": 0.229, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.281473159790039, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2476, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.229278326034546, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1918, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 0.9863131046295166, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1912, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.1746445894241333, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1969, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.2140766382217407, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1848, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.1374038457870483, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.187, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.2810077667236328, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1906, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.018121600151062, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1538, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.1373966932296753, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1705, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.2875229120254517, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1792, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.0193504095077515, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1688, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 0.9890351295471191, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1768, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 1.1963058710098267, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1671, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.7854270339012146, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1635, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 0.7273339033126831, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1568, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.3632380962371826, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1666, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 0.9221144318580627, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1519, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.9728848934173584, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.157, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 1.1193106174468994, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1497, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.8126823306083679, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1553, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.980919599533081, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1458, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.4425220489501953, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1568, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 1.009759783744812, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1386, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.7652599215507507, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1419, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.8194523453712463, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1467, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.7665146589279175, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1423, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.8059810400009155, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1332, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.8315797448158264, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1428, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.6194626688957214, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1411, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.6284136176109314, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1348, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.6207159161567688, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1294, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.7779521942138672, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1281, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.8567638397216797, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1231, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.8884188532829285, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.133, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.7675006985664368, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1361, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.6680415868759155, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1362, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.6427933573722839, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1264, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.560254693031311, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1227, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.5066015720367432, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1285, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.5901069045066833, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1345, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.773160457611084, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1283, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5916489958763123, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1194, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5266557931900024, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1268, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.6435649991035461, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1192, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.5378548502922058, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1178, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.5611051917076111, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1264, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.6266173720359802, + "learning_rate": 6.237385210498588e-06, + "loss": 0.112, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.7127836346626282, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1229, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.4550264775753021, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1148, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.3910878300666809, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1095, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.5211746096611023, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1154, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.5879091024398804, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1221, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.49942946434020996, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1148, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.5927332043647766, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1128, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.4323960244655609, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1215, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.48835039138793945, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1159, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.5036460757255554, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1151, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.5331623554229736, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.1137, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.38850846886634827, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1068, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.4269516170024872, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1075, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.5186660289764404, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1131, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.5227987170219421, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1133, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.4650282859802246, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1068, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.42946764826774597, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.1121, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.3922154903411865, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.112, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.4805162847042084, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.1172, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.4081190228462219, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.1191, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.46252021193504333, + "learning_rate": 9.724785471955566e-07, + "loss": 0.1074, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.4496641755104065, + "learning_rate": 8.299636258812199e-07, + "loss": 0.115, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.4630487263202667, + "learning_rate": 6.984393814019885e-07, + "loss": 0.1192, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.4537874162197113, + "learning_rate": 5.780078276432865e-07, + "loss": 0.1106, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.4257529079914093, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.1085, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.4500311613082886, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1082, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.3785618543624878, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.1085, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.4154369831085205, + "learning_rate": 2.089461901495715e-07, + "loss": 0.1161, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.5612080097198486, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.1072, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.5190894603729248, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1046, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.37792685627937317, + "learning_rate": 5.232782089872601e-08, + "loss": 0.1109, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.39401429891586304, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1007, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.5319847464561462, + "learning_rate": 5.817209927129752e-09, + "loss": 0.1099, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.5021974776866406e+17, + "train_loss": 0.3117149337273254, + "train_runtime": 395.5776, + "train_samples_per_second": 47.915, + "train_steps_per_second": 1.502 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5021974776866406e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8720a55092d0af5610259fd533d3bf160ea3cee0 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/1_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:742d4785fca3f932683e0f790321e5edd6c98c85244ac143b5629cb1bd47bce3 +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..41441bc059bb1fecf443b82d8f63a797fb7f2fec --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 2_128_e3_3e-5 + results: [] +--- + + + +# 2_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..48ed742a2790706ed7d8f8b6d3340d9a562ff5c9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "down_proj", + "k_proj", + "o_proj", + "q_proj", + "v_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbeb162b41dc4a27f13a12674d8c9fbe8d620951 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a9995b4e0e5d4d4095e04707ec2675bf3a97828a63d2270347fac1aabb3c31 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..477c4eac062954bf1bb211d7062f4cd351342eed --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8577037903213363e+17, + "train_loss": 0.32085918045605877, + "train_runtime": 397.1595, + "train_samples": 6319, + "train_samples_per_second": 47.731, + "train_steps_per_second": 1.496 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..477c4eac062954bf1bb211d7062f4cd351342eed --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8577037903213363e+17, + "train_loss": 0.32085918045605877, + "train_runtime": 397.1595, + "train_samples": 6319, + "train_samples_per_second": 47.731, + "train_steps_per_second": 1.496 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..266273ee4546619ce37068b649a9fe80883a5f3c --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.4331856966018677, + "learning_rate": 4e-06, + "loss": 1.734, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5040277242660522, + "learning_rate": 9e-06, + "loss": 1.755, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.3331828117370605, + "learning_rate": 1.4e-05, + "loss": 1.7198, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.8713854551315308, + "learning_rate": 1.9e-05, + "loss": 1.5881, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.0643396377563477, + "learning_rate": 2.4e-05, + "loss": 1.4655, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.5356048345565796, + "learning_rate": 2.9e-05, + "loss": 1.4057, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.5283280611038208, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.366, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.3595463037490845, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.2055, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.4115629196166992, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1105, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.4638973474502563, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0669, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.4839375019073486, + "learning_rate": 2.9866162322321703e-05, + "loss": 1.0518, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 2.0925419330596924, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8974, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.5548053979873657, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8184, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.9603372812271118, + "learning_rate": 2.964744782530777e-05, + "loss": 0.7709, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.6679781675338745, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7926, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 2.3215460777282715, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.7015, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.7579840421676636, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.6685, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.8469500541687012, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.6487, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.7498441934585571, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.5832, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.9129524230957031, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5296, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.5961112976074219, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4922, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 2.236734628677368, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4597, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.6481090784072876, + "learning_rate": 2.838778253789822e-05, + "loss": 0.4265, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 2.1828773021698, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.4125, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.6514759063720703, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.4018, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.5739853382110596, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3444, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.9283863306045532, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3643, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.8200851678848267, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3253, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.757677674293518, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3634, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.6110481023788452, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3462, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.8130594491958618, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.3246, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.751772403717041, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.3122, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.5467827320098877, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.3088, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.869266152381897, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2576, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.4269520044326782, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2845, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 2.107978582382202, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2697, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.4456874132156372, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2425, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.6220405101776123, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2236, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.3849838972091675, + "learning_rate": 2.416452950136248e-05, + "loss": 0.242, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.1930011510849, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1981, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.2221343517303467, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1995, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.5755507946014404, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1889, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.289252758026123, + "learning_rate": 2.278755057120863e-05, + "loss": 0.2005, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.1153881549835205, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1593, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.248993158340454, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1739, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.2271095514297485, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1822, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.3861188888549805, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1876, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.003029704093933, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1642, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.0499005317687988, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1521, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.4048848152160645, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1679, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 1.0349253416061401, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1619, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.9740352034568787, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1572, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.0863126516342163, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1516, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.0735421180725098, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1639, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 1.006008267402649, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1398, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.6826353669166565, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1507, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.9635311961174011, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1432, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.9691417217254639, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1488, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.8336058855056763, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1407, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.028816819190979, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1309, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 1.5329989194869995, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1443, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 1.3084213733673096, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1384, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.8160310387611389, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.14, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 1.0572855472564697, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1412, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.9060344696044922, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1278, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.8129478693008423, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1293, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.7587897181510925, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1237, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.9445025324821472, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1329, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.7635170817375183, + "learning_rate": 1.234043966149462e-05, + "loss": 0.126, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.8804559707641602, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1204, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.8054929375648499, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1197, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.7191224098205566, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1405, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.8693550229072571, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1241, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.6935634016990662, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1243, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 1.188002347946167, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1192, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.5204651355743408, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1079, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.8785454630851746, + "learning_rate": 9.144160872547579e-06, + "loss": 0.12, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.7824288606643677, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1147, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.7382746934890747, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1059, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.7064906358718872, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1245, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.7076740264892578, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1081, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.6188404560089111, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1061, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.5379190444946289, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1105, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.4566333293914795, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1051, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.730892539024353, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1087, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.6480405330657959, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1072, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.7346988320350647, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1004, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.44476959109306335, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1016, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.35098615288734436, + "learning_rate": 4.938229047911652e-06, + "loss": 0.0937, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.5324970483779907, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1058, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.5353461503982544, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1018, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.561500608921051, + "learning_rate": 4.044902040769963e-06, + "loss": 0.0973, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.5657665729522705, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1014, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.7442361116409302, + "learning_rate": 3.491450893410134e-06, + "loss": 0.0987, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.3957414925098419, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.0925, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.47006043791770935, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0945, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.558597981929779, + "learning_rate": 2.728715183008864e-06, + "loss": 0.0998, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.8346509337425232, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.0901, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.4225305914878845, + "learning_rate": 2.267485570730894e-06, + "loss": 0.0949, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.47042638063430786, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1001, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.5002344846725464, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.0969, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.6675495505332947, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0991, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.7064018249511719, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0942, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.42286184430122375, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0948, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.42062777280807495, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0968, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.48490676283836365, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0973, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.47936156392097473, + "learning_rate": 8.299636258812199e-07, + "loss": 0.0962, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.5494322180747986, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0983, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.4665779769420624, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0968, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.41104453802108765, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0924, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.5705447793006897, + "learning_rate": 3.707877563706158e-07, + "loss": 0.0981, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.4556790292263031, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0996, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.5352632403373718, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0955, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.3622061610221863, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0929, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.5170547962188721, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1018, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4528382420539856, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0949, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.48260003328323364, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1011, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.5233713984489441, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0936, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.8577037903213363e+17, + "train_loss": 0.32085918045605877, + "train_runtime": 397.1595, + "train_samples_per_second": 47.731, + "train_steps_per_second": 1.496 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8577037903213363e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd15aed7f9f95fa2199252427f1c60d6aec9c4d9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/2_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f04ad09ea242d5c349444e331c16a64e4fe788335b2a533ecece1037dd08ff +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..35d927addc1c491abb6448078f3684a41347a045 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 3_128_e3_3e-5 + results: [] +--- + + + +# 3_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5161547cfc9015e265ca39e51fcad6355f469f95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "v_proj", + "up_proj", + "q_proj", + "o_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..043f01df498af633310633993338020940ec9255 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b12e2070b4401b6cfa1848280f7240883d5d5b80fa534460741fac7aea97487a +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d1a5ff7442ed2d2e4da91b2a2334874f188d2841 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7093599182402355e+17, + "train_loss": 0.30828643221445756, + "train_runtime": 389.622, + "train_samples": 6314, + "train_samples_per_second": 48.616, + "train_steps_per_second": 1.525 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d1a5ff7442ed2d2e4da91b2a2334874f188d2841 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7093599182402355e+17, + "train_loss": 0.30828643221445756, + "train_runtime": 389.622, + "train_samples": 6314, + "train_samples_per_second": 48.616, + "train_steps_per_second": 1.525 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0bdf5a6d2cd6f55ac31942aff02f0132a4af991c --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.619051218032837, + "learning_rate": 4e-06, + "loss": 1.7388, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5689483880996704, + "learning_rate": 9e-06, + "loss": 1.7134, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.136069893836975, + "learning_rate": 1.4e-05, + "loss": 1.6612, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.6846541166305542, + "learning_rate": 1.9e-05, + "loss": 1.5529, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.251536250114441, + "learning_rate": 2.4e-05, + "loss": 1.4725, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.530225396156311, + "learning_rate": 2.9e-05, + "loss": 1.3776, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.4737138748168945, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2222, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.3369853496551514, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1569, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.4490716457366943, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.0968, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.3733012676239014, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0386, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.6092872619628906, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.927, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.5804064273834229, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8276, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.747194766998291, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8009, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.7942897081375122, + "learning_rate": 2.964744782530777e-05, + "loss": 0.758, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.8037140369415283, + "learning_rate": 2.955173677376284e-05, + "loss": 0.6856, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 2.227295160293579, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6129, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.739098072052002, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5476, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.953800082206726, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5399, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.7105361223220825, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4941, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 2.1508584022521973, + "learning_rate": 2.890567022607206e-05, + "loss": 0.4508, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.6763601303100586, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.5162, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 2.2208757400512695, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4161, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.3967549800872803, + "learning_rate": 2.838778253789822e-05, + "loss": 0.4168, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.697741985321045, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3691, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.4547032117843628, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3523, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.494426965713501, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3463, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.90861177444458, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3364, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.4092456102371216, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3044, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.7048689126968384, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.2987, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.9330724477767944, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3191, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.4210857152938843, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2761, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.6646480560302734, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2376, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.3003021478652954, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2643, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.2075560092926025, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2313, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.3828257322311401, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2449, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.49900484085083, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2325, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.4820173978805542, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2457, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.0645232200622559, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2074, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.3796106576919556, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2079, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.3384943008422852, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1881, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.1452109813690186, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1842, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.056641697883606, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1748, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.319148302078247, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1678, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.1153067350387573, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1736, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.168117642402649, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1762, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.135085940361023, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1703, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.1386165618896484, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1652, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.0137522220611572, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.156, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.0528894662857056, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1822, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.05873441696167, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1975, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 1.054236650466919, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1763, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.895095705986023, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1672, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.104076623916626, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1503, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.0975689888000488, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1591, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 1.1713941097259521, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1475, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.8552865982055664, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1386, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.8676689863204956, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1812, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.9631779193878174, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1511, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.904202938079834, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1726, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 0.7408406138420105, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1418, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.9052717089653015, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1388, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.7882755398750305, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1359, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.8167088031768799, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1492, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.8505122065544128, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1354, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.9417012333869934, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.127, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.8338747620582581, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1337, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.9467313885688782, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1363, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.7521552443504333, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1257, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.5861857533454895, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1281, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.8988696336746216, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1371, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.6483206748962402, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.124, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.8047576546669006, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1243, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.5963404774665833, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1237, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.7981751561164856, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1213, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.6747472882270813, + "learning_rate": 9.922075858704368e-06, + "loss": 0.134, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.4369167685508728, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1233, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.5219217538833618, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1183, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.6108366250991821, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1279, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.8099241256713867, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1212, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5935463905334473, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1066, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.6568154692649841, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1078, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.506991446018219, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1132, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.5020112991333008, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1082, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.6010819673538208, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1161, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.4680786430835724, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1104, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.5004385113716125, + "learning_rate": 5.901757461493989e-06, + "loss": 0.111, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.5595095157623291, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1158, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.5922601222991943, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1142, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.664776086807251, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1075, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.4299693703651428, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1077, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.5962541103363037, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1065, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.5006476044654846, + "learning_rate": 4.044902040769963e-06, + "loss": 0.0994, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.39104586839675903, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0975, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.39948374032974243, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1083, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.3959241509437561, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1013, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.38376688957214355, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0961, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.6427478790283203, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1058, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.5430572628974915, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1104, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.5367764830589294, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1058, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.4615514278411865, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1042, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.5757582783699036, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1034, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.42835476994514465, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0977, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.3667435050010681, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0974, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.4710931181907654, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.1056, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.4284909963607788, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0985, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.4355495572090149, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0999, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.4817695915699005, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1057, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.38936957716941833, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0997, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.44348078966140747, + "learning_rate": 5.780078276432865e-07, + "loss": 0.1016, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.4160664975643158, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0981, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.5656613707542419, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1088, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.3931078016757965, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0959, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.5129686594009399, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0945, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.542986273765564, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0981, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.40076467394828796, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1018, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4306162893772125, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0979, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.5417938232421875, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.101, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.43664395809173584, + "learning_rate": 5.817209927129752e-09, + "loss": 0.1003, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.7093599182402355e+17, + "train_loss": 0.30828643221445756, + "train_runtime": 389.622, + "train_samples_per_second": 48.616, + "train_steps_per_second": 1.525 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7093599182402355e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8dad1dddeabd825cf048246d3eebe79cfb23b4d --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/3_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ce69cc8b080d9e95850c9db9add4effac8bd0e4356b57dec84e12d5e6eaeff +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b8aadc0276bb51456f871680de69fc22fe01453 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 4_128_e3_3e-5 + results: [] +--- + + + +# 4_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef47048cf33543d4ae71a1f122bbd655b50d8731 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "gate_proj", + "up_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c94941e90f399400b0efaa106d31772dd3973eb --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2298dc113354edad37fad16e878ab87e5a9d06f2a760a676acc50b2b4aa2e964 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f38227f74c58bde67607573941560ff6ec802bab --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7976253992245658e+17, + "train_loss": 0.3052744480094524, + "train_runtime": 397.9397, + "train_samples": 6308, + "train_samples_per_second": 47.555, + "train_steps_per_second": 1.493 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f38227f74c58bde67607573941560ff6ec802bab --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7976253992245658e+17, + "train_loss": 0.3052744480094524, + "train_runtime": 397.9397, + "train_samples": 6308, + "train_samples_per_second": 47.555, + "train_steps_per_second": 1.493 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..06f1875706a566b2a345bc7cf3724f9ee3cf5eca --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.025348542458808618, + "grad_norm": 1.2597395181655884, + "learning_rate": 4e-06, + "loss": 1.73, + "step": 5 + }, + { + "epoch": 0.050697084917617236, + "grad_norm": 1.6612614393234253, + "learning_rate": 9e-06, + "loss": 1.7184, + "step": 10 + }, + { + "epoch": 0.07604562737642585, + "grad_norm": 1.7417309284210205, + "learning_rate": 1.4e-05, + "loss": 1.6297, + "step": 15 + }, + { + "epoch": 0.10139416983523447, + "grad_norm": 1.449503779411316, + "learning_rate": 1.9e-05, + "loss": 1.4448, + "step": 20 + }, + { + "epoch": 0.1267427122940431, + "grad_norm": 1.5252962112426758, + "learning_rate": 2.4e-05, + "loss": 1.3421, + "step": 25 + }, + { + "epoch": 0.1520912547528517, + "grad_norm": 1.066590428352356, + "learning_rate": 2.9e-05, + "loss": 1.3987, + "step": 30 + }, + { + "epoch": 0.17743979721166034, + "grad_norm": 1.6751725673675537, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2445, + "step": 35 + }, + { + "epoch": 0.20278833967046894, + "grad_norm": 1.5106178522109985, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1866, + "step": 40 + }, + { + "epoch": 0.22813688212927757, + "grad_norm": 1.5768376588821411, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.0563, + "step": 45 + }, + { + "epoch": 0.2534854245880862, + "grad_norm": 1.4491944313049316, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0099, + "step": 50 + }, + { + "epoch": 0.2788339670468948, + "grad_norm": 1.619663953781128, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.9386, + "step": 55 + }, + { + "epoch": 0.3041825095057034, + "grad_norm": 1.7093677520751953, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8507, + "step": 60 + }, + { + "epoch": 0.32953105196451205, + "grad_norm": 1.4354573488235474, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8155, + "step": 65 + }, + { + "epoch": 0.3548795944233207, + "grad_norm": 1.754475712776184, + "learning_rate": 2.964744782530777e-05, + "loss": 0.6775, + "step": 70 + }, + { + "epoch": 0.38022813688212925, + "grad_norm": 1.7561532258987427, + "learning_rate": 2.955173677376284e-05, + "loss": 0.6976, + "step": 75 + }, + { + "epoch": 0.4055766793409379, + "grad_norm": 1.7935422658920288, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.7002, + "step": 80 + }, + { + "epoch": 0.4309252217997465, + "grad_norm": 2.198117733001709, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.6236, + "step": 85 + }, + { + "epoch": 0.45627376425855515, + "grad_norm": 1.7154661417007446, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5202, + "step": 90 + }, + { + "epoch": 0.4816223067173637, + "grad_norm": 2.0734901428222656, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4914, + "step": 95 + }, + { + "epoch": 0.5069708491761724, + "grad_norm": 1.9814143180847168, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5209, + "step": 100 + }, + { + "epoch": 0.532319391634981, + "grad_norm": 1.6074522733688354, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4599, + "step": 105 + }, + { + "epoch": 0.5576679340937896, + "grad_norm": 1.6758123636245728, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4436, + "step": 110 + }, + { + "epoch": 0.5830164765525983, + "grad_norm": 1.5953923463821411, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3927, + "step": 115 + }, + { + "epoch": 0.6083650190114068, + "grad_norm": 1.8132802248001099, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3811, + "step": 120 + }, + { + "epoch": 0.6337135614702155, + "grad_norm": 2.00042724609375, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3945, + "step": 125 + }, + { + "epoch": 0.6590621039290241, + "grad_norm": 1.5519765615463257, + "learning_rate": 2.777648771814114e-05, + "loss": 0.359, + "step": 130 + }, + { + "epoch": 0.6844106463878327, + "grad_norm": 1.7856067419052124, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3179, + "step": 135 + }, + { + "epoch": 0.7097591888466414, + "grad_norm": 1.6971591711044312, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3344, + "step": 140 + }, + { + "epoch": 0.7351077313054499, + "grad_norm": 2.0304582118988037, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.319, + "step": 145 + }, + { + "epoch": 0.7604562737642585, + "grad_norm": 1.744016408920288, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3051, + "step": 150 + }, + { + "epoch": 0.7858048162230672, + "grad_norm": 1.289427638053894, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.278, + "step": 155 + }, + { + "epoch": 0.8111533586818758, + "grad_norm": 1.5661474466323853, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2825, + "step": 160 + }, + { + "epoch": 0.8365019011406845, + "grad_norm": 1.3710689544677734, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.25, + "step": 165 + }, + { + "epoch": 0.861850443599493, + "grad_norm": 1.594046711921692, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2774, + "step": 170 + }, + { + "epoch": 0.8871989860583016, + "grad_norm": 1.4260838031768799, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2478, + "step": 175 + }, + { + "epoch": 0.9125475285171103, + "grad_norm": 1.7486281394958496, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2539, + "step": 180 + }, + { + "epoch": 0.9378960709759189, + "grad_norm": 1.306111454963684, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2307, + "step": 185 + }, + { + "epoch": 0.9632446134347274, + "grad_norm": 1.151717185974121, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2287, + "step": 190 + }, + { + "epoch": 0.9885931558935361, + "grad_norm": 1.485127329826355, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2339, + "step": 195 + }, + { + "epoch": 1.0101394169835234, + "grad_norm": 1.261560320854187, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.206, + "step": 200 + }, + { + "epoch": 1.035487959442332, + "grad_norm": 1.2174924612045288, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1937, + "step": 205 + }, + { + "epoch": 1.0608365019011408, + "grad_norm": 1.2362275123596191, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1803, + "step": 210 + }, + { + "epoch": 1.0861850443599492, + "grad_norm": 1.3199458122253418, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1954, + "step": 215 + }, + { + "epoch": 1.111533586818758, + "grad_norm": 1.0054856538772583, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.169, + "step": 220 + }, + { + "epoch": 1.1368821292775666, + "grad_norm": 1.4963363409042358, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1888, + "step": 225 + }, + { + "epoch": 1.162230671736375, + "grad_norm": 1.2480735778808594, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1694, + "step": 230 + }, + { + "epoch": 1.1875792141951838, + "grad_norm": 1.1447769403457642, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.175, + "step": 235 + }, + { + "epoch": 1.2129277566539924, + "grad_norm": 1.0861809253692627, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1541, + "step": 240 + }, + { + "epoch": 1.2382762991128011, + "grad_norm": 1.316166877746582, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1484, + "step": 245 + }, + { + "epoch": 1.2636248415716096, + "grad_norm": 1.0434430837631226, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1504, + "step": 250 + }, + { + "epoch": 1.2889733840304183, + "grad_norm": 1.246682047843933, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1597, + "step": 255 + }, + { + "epoch": 1.3143219264892267, + "grad_norm": 1.014186978340149, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1487, + "step": 260 + }, + { + "epoch": 1.3396704689480354, + "grad_norm": 1.0270503759384155, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1525, + "step": 265 + }, + { + "epoch": 1.3650190114068441, + "grad_norm": 1.1792865991592407, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1525, + "step": 270 + }, + { + "epoch": 1.3903675538656528, + "grad_norm": 1.2939376831054688, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1526, + "step": 275 + }, + { + "epoch": 1.4157160963244613, + "grad_norm": 1.113114833831787, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1381, + "step": 280 + }, + { + "epoch": 1.44106463878327, + "grad_norm": 0.8477252125740051, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1436, + "step": 285 + }, + { + "epoch": 1.4664131812420786, + "grad_norm": 0.8423115611076355, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1417, + "step": 290 + }, + { + "epoch": 1.491761723700887, + "grad_norm": 1.1133636236190796, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1384, + "step": 295 + }, + { + "epoch": 1.5171102661596958, + "grad_norm": 0.7628555297851562, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1315, + "step": 300 + }, + { + "epoch": 1.5424588086185045, + "grad_norm": 0.8875032067298889, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1445, + "step": 305 + }, + { + "epoch": 1.5678073510773132, + "grad_norm": 0.7518343925476074, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1355, + "step": 310 + }, + { + "epoch": 1.5931558935361216, + "grad_norm": 0.8724908828735352, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1293, + "step": 315 + }, + { + "epoch": 1.6185044359949303, + "grad_norm": 0.9677815437316895, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.125, + "step": 320 + }, + { + "epoch": 1.6438529784537388, + "grad_norm": 1.261006236076355, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1262, + "step": 325 + }, + { + "epoch": 1.6692015209125475, + "grad_norm": 0.7328829765319824, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1231, + "step": 330 + }, + { + "epoch": 1.6945500633713562, + "grad_norm": 0.9738350510597229, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1307, + "step": 335 + }, + { + "epoch": 1.7198986058301649, + "grad_norm": 0.7044712901115417, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1174, + "step": 340 + }, + { + "epoch": 1.7452471482889735, + "grad_norm": 0.8294494152069092, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1287, + "step": 345 + }, + { + "epoch": 1.770595690747782, + "grad_norm": 1.1017963886260986, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1289, + "step": 350 + }, + { + "epoch": 1.7959442332065905, + "grad_norm": 0.6644359230995178, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1283, + "step": 355 + }, + { + "epoch": 1.8212927756653992, + "grad_norm": 1.125322937965393, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1244, + "step": 360 + }, + { + "epoch": 1.8466413181242078, + "grad_norm": 0.6556974649429321, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1188, + "step": 365 + }, + { + "epoch": 1.8719898605830165, + "grad_norm": 0.5250052809715271, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1099, + "step": 370 + }, + { + "epoch": 1.8973384030418252, + "grad_norm": 0.6481403112411499, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1257, + "step": 375 + }, + { + "epoch": 1.9226869455006337, + "grad_norm": 0.5517955422401428, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1069, + "step": 380 + }, + { + "epoch": 1.9480354879594424, + "grad_norm": 0.5799809694290161, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1203, + "step": 385 + }, + { + "epoch": 1.9733840304182508, + "grad_norm": 0.7844173312187195, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1064, + "step": 390 + }, + { + "epoch": 1.9987325728770595, + "grad_norm": 0.6437531113624573, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1099, + "step": 395 + }, + { + "epoch": 2.0202788339670468, + "grad_norm": 0.44110915064811707, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1179, + "step": 400 + }, + { + "epoch": 2.0456273764258555, + "grad_norm": 0.5330139994621277, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1077, + "step": 405 + }, + { + "epoch": 2.070975918884664, + "grad_norm": 0.507890522480011, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.0973, + "step": 410 + }, + { + "epoch": 2.096324461343473, + "grad_norm": 0.49595609307289124, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1041, + "step": 415 + }, + { + "epoch": 2.1216730038022815, + "grad_norm": 0.6541264057159424, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1043, + "step": 420 + }, + { + "epoch": 2.14702154626109, + "grad_norm": 0.5779265761375427, + "learning_rate": 6.237385210498588e-06, + "loss": 0.108, + "step": 425 + }, + { + "epoch": 2.1723700887198985, + "grad_norm": 0.5817006826400757, + "learning_rate": 5.901757461493989e-06, + "loss": 0.0997, + "step": 430 + }, + { + "epoch": 2.197718631178707, + "grad_norm": 0.5166028141975403, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1059, + "step": 435 + }, + { + "epoch": 2.223067173637516, + "grad_norm": 0.5276929140090942, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1068, + "step": 440 + }, + { + "epoch": 2.2484157160963245, + "grad_norm": 0.6705610752105713, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1065, + "step": 445 + }, + { + "epoch": 2.273764258555133, + "grad_norm": 0.42125841975212097, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1004, + "step": 450 + }, + { + "epoch": 2.299112801013942, + "grad_norm": 0.5841954350471497, + "learning_rate": 4.334482243338589e-06, + "loss": 0.096, + "step": 455 + }, + { + "epoch": 2.32446134347275, + "grad_norm": 0.5501387119293213, + "learning_rate": 4.044902040769963e-06, + "loss": 0.096, + "step": 460 + }, + { + "epoch": 2.349809885931559, + "grad_norm": 0.4102640151977539, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0967, + "step": 465 + }, + { + "epoch": 2.3751584283903675, + "grad_norm": 0.5982562303543091, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1061, + "step": 470 + }, + { + "epoch": 2.400506970849176, + "grad_norm": 0.4171775281429291, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1053, + "step": 475 + }, + { + "epoch": 2.425855513307985, + "grad_norm": 0.5157179236412048, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0981, + "step": 480 + }, + { + "epoch": 2.4512040557667936, + "grad_norm": 0.4761216640472412, + "learning_rate": 2.728715183008864e-06, + "loss": 0.0985, + "step": 485 + }, + { + "epoch": 2.4765525982256023, + "grad_norm": 0.5032146573066711, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1013, + "step": 490 + }, + { + "epoch": 2.5019011406844105, + "grad_norm": 0.4852380156517029, + "learning_rate": 2.267485570730894e-06, + "loss": 0.0963, + "step": 495 + }, + { + "epoch": 2.527249683143219, + "grad_norm": 0.47207698225975037, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.0975, + "step": 500 + }, + { + "epoch": 2.552598225602028, + "grad_norm": 0.4765535295009613, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1004, + "step": 505 + }, + { + "epoch": 2.5779467680608366, + "grad_norm": 0.624397873878479, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0978, + "step": 510 + }, + { + "epoch": 2.6032953105196452, + "grad_norm": 0.4087825119495392, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0934, + "step": 515 + }, + { + "epoch": 2.6286438529784535, + "grad_norm": 0.5359424948692322, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0929, + "step": 520 + }, + { + "epoch": 2.653992395437262, + "grad_norm": 0.6894325017929077, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.1036, + "step": 525 + }, + { + "epoch": 2.679340937896071, + "grad_norm": 0.5686900615692139, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0935, + "step": 530 + }, + { + "epoch": 2.7046894803548795, + "grad_norm": 0.44335612654685974, + "learning_rate": 8.299636258812199e-07, + "loss": 0.0951, + "step": 535 + }, + { + "epoch": 2.7300380228136882, + "grad_norm": 0.4200829267501831, + "learning_rate": 6.984393814019885e-07, + "loss": 0.09, + "step": 540 + }, + { + "epoch": 2.755386565272497, + "grad_norm": 0.47023630142211914, + "learning_rate": 5.780078276432865e-07, + "loss": 0.1029, + "step": 545 + }, + { + "epoch": 2.7807351077313056, + "grad_norm": 0.43258345127105713, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.099, + "step": 550 + }, + { + "epoch": 2.8060836501901143, + "grad_norm": 0.48954030871391296, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1, + "step": 555 + }, + { + "epoch": 2.8314321926489225, + "grad_norm": 0.4546068012714386, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0963, + "step": 560 + }, + { + "epoch": 2.8567807351077312, + "grad_norm": 0.39951595664024353, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0964, + "step": 565 + }, + { + "epoch": 2.88212927756654, + "grad_norm": 0.4355326294898987, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0905, + "step": 570 + }, + { + "epoch": 2.9074778200253486, + "grad_norm": 0.4281630516052246, + "learning_rate": 9.298514701147898e-08, + "loss": 0.0848, + "step": 575 + }, + { + "epoch": 2.9328263624841573, + "grad_norm": 0.4236002266407013, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0902, + "step": 580 + }, + { + "epoch": 2.9581749049429655, + "grad_norm": 0.40398433804512024, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.0948, + "step": 585 + }, + { + "epoch": 2.983523447401774, + "grad_norm": 0.4113132953643799, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0921, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.7976253992245658e+17, + "train_loss": 0.3052744480094524, + "train_runtime": 397.9397, + "train_samples_per_second": 47.555, + "train_steps_per_second": 1.493 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7976253992245658e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bde7fe595712be989eb8d024fda2947c189c441 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/4_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed87927856b5dd52b97391e9df9c58344e969af5f341e52da7d4e0508a87a5f +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..59a12dac6aca7a0abe7df37e7b15266c98e1e6d8 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 5_128_e3_3e-5 + results: [] +--- + + + +# 5_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9624acc59e318fcc80482a3fa1435715d9fd09e1 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "k_proj", + "q_proj", + "o_proj", + "down_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4cf289a5579894352b5f640bd924124f36501481 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9e8fcaf952b700adbe6fd1e51e120fb6141655cd6beaa9919da69b06aeef15 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f1786b6907a3a3d33fb2b8c1c873f105a9bbc630 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.737476473356288e+17, + "train_loss": 0.3053576248864132, + "train_runtime": 390.8265, + "train_samples": 6318, + "train_samples_per_second": 48.497, + "train_steps_per_second": 1.52 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f1786b6907a3a3d33fb2b8c1c873f105a9bbc630 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.737476473356288e+17, + "train_loss": 0.3053576248864132, + "train_runtime": 390.8265, + "train_samples": 6318, + "train_samples_per_second": 48.497, + "train_steps_per_second": 1.52 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..abcfbf914fbb16b34096202ab594042a2e2f43d6 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.483224630355835, + "learning_rate": 4e-06, + "loss": 1.7982, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.3512424230575562, + "learning_rate": 9e-06, + "loss": 1.6616, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.3547309637069702, + "learning_rate": 1.4e-05, + "loss": 1.6728, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.2131034135818481, + "learning_rate": 1.9e-05, + "loss": 1.5593, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.4468414783477783, + "learning_rate": 2.4e-05, + "loss": 1.533, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.2515822649002075, + "learning_rate": 2.9e-05, + "loss": 1.3821, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.6640502214431763, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2387, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.4368033409118652, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1673, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.5038846731185913, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1796, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.4365618228912354, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0269, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.9087409973144531, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.9782, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.9496780633926392, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8567, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.5718514919281006, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8184, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 2.074193239212036, + "learning_rate": 2.964744782530777e-05, + "loss": 0.6993, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.735181212425232, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7474, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.7464500665664673, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6379, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.7384036779403687, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.6092, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.5950777530670166, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5256, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.9247709512710571, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4881, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.660312294960022, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5094, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 2.0594310760498047, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4329, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.4698081016540527, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4118, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.9151757955551147, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3928, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.7078217267990112, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3641, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.5883245468139648, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3619, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.3476817607879639, + "learning_rate": 2.777648771814114e-05, + "loss": 0.2987, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.5766232013702393, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3062, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 2.1069650650024414, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.331, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.57357656955719, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.2936, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.501038670539856, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.2768, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.7691450119018555, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2978, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.3485534191131592, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2722, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.743384838104248, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2399, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.9562958478927612, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2426, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.4701511859893799, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2328, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.4795855283737183, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2318, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.369144320487976, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.234, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.5799957513809204, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2116, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.2948460578918457, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2251, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.6121970415115356, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1888, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.2204859256744385, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1775, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.7024487257003784, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.2023, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.1096378564834595, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1633, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.2814828157424927, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1741, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.1782859563827515, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1684, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.0942440032958984, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1477, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.34474778175354, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1511, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.454332947731018, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1649, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.3552030324935913, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1707, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 0.9466403722763062, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1615, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 1.1465818881988525, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1564, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 1.0673530101776123, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1435, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 0.9664721488952637, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1536, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.0588293075561523, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1492, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 0.8707783818244934, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1462, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 1.157206654548645, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1423, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 1.127726435661316, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1393, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 1.0298659801483154, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1432, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 1.0723909139633179, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1395, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.2457947731018066, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.14, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.9470780491828918, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1315, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 1.054225206375122, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1271, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.9454329609870911, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1313, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.8693910241127014, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1254, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.9612795114517212, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1242, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.6711055636405945, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1212, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.8688486814498901, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1299, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.9115237593650818, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.122, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.6805409789085388, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1216, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.9460160136222839, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1219, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.8223559260368347, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1231, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 1.286515235900879, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.116, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.7976328730583191, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1156, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 1.09882652759552, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1194, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.7624656558036804, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1085, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.5423561334609985, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1152, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.6010194420814514, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1128, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.6903271675109863, + "learning_rate": 8.761866287946955e-06, + "loss": 0.11, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.8779557347297668, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1061, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5660829544067383, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1024, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.6046552062034607, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1012, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.6035295724868774, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1058, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.5582605004310608, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1047, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.5027115345001221, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1033, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.5051896572113037, + "learning_rate": 6.237385210498588e-06, + "loss": 0.0944, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.5589889883995056, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1046, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.469156950712204, + "learning_rate": 5.573186564064649e-06, + "loss": 0.0971, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.5502706170082092, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1042, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.4997517466545105, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1079, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.4469977617263794, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1124, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.5012600421905518, + "learning_rate": 4.334482243338589e-06, + "loss": 0.093, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.45672786235809326, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1002, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.4676864445209503, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0963, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.5044505000114441, + "learning_rate": 3.491450893410134e-06, + "loss": 0.0991, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.5793904662132263, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.101, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.614927351474762, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0937, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.4913713037967682, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1002, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.42625540494918823, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.094, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.4231247305870056, + "learning_rate": 2.267485570730894e-06, + "loss": 0.097, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.4993528425693512, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.0925, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.4112917482852936, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.0977, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.52168869972229, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.1066, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.48104792833328247, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0993, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.4378977417945862, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0973, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.4554797112941742, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0918, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.5086758136749268, + "learning_rate": 9.724785471955566e-07, + "loss": 0.1005, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.5449450612068176, + "learning_rate": 8.299636258812199e-07, + "loss": 0.0962, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.42181727290153503, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0942, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.5065450072288513, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0985, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.482354998588562, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0941, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.42983222007751465, + "learning_rate": 3.707877563706158e-07, + "loss": 0.094, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.48284855484962463, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0978, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.4915887415409088, + "learning_rate": 2.089461901495715e-07, + "loss": 0.1024, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.4611974358558655, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.1012, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.5482293963432312, + "learning_rate": 9.298514701147898e-08, + "loss": 0.0926, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4771062135696411, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0964, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.40679630637168884, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.096, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.5164175629615784, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0914, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.737476473356288e+17, + "train_loss": 0.3053576248864132, + "train_runtime": 390.8265, + "train_samples_per_second": 48.497, + "train_steps_per_second": 1.52 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.737476473356288e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ceede62982126b3c904515572db3d441faf40bc8 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/5_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1fb56d1201501417f56883e7f1385accc644fd94480052e68115706e7114a5 +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3f0577ce1b70f6430faf7d61aba513c9f3a43c7b --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 6_128_e3_3e-5 + results: [] +--- + + + +# 6_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e80c41a456286140c5c4e162f371e9fd25f07d9f --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "o_proj", + "q_proj", + "down_proj", + "gate_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c167524469801bcb0d1c93fde56b6d61df64a20 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09d071a0f1783b21e97d99ac7e06271e98fedce2e7b5039189bd396e82a68e3 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..515b1e879bd39f1675e47df72b9771884f8b0c94 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.760462524949463e+17, + "train_loss": 0.3074202121951472, + "train_runtime": 398.1856, + "train_samples": 6348, + "train_samples_per_second": 47.827, + "train_steps_per_second": 1.499 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..515b1e879bd39f1675e47df72b9771884f8b0c94 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.760462524949463e+17, + "train_loss": 0.3074202121951472, + "train_runtime": 398.1856, + "train_samples": 6348, + "train_samples_per_second": 47.827, + "train_steps_per_second": 1.499 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef70ef46ce5dd51d55315e740bd578da3275d3ce --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/trainer_state.json @@ -0,0 +1,876 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 597, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02518891687657431, + "grad_norm": 1.2221956253051758, + "learning_rate": 4e-06, + "loss": 1.7722, + "step": 5 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 1.4741709232330322, + "learning_rate": 9e-06, + "loss": 1.7735, + "step": 10 + }, + { + "epoch": 0.07556675062972293, + "grad_norm": 1.3625484704971313, + "learning_rate": 1.4e-05, + "loss": 1.6468, + "step": 15 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 1.3311086893081665, + "learning_rate": 1.9e-05, + "loss": 1.5045, + "step": 20 + }, + { + "epoch": 0.12594458438287154, + "grad_norm": 1.2504208087921143, + "learning_rate": 2.4e-05, + "loss": 1.4341, + "step": 25 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 1.3425238132476807, + "learning_rate": 2.9e-05, + "loss": 1.2989, + "step": 30 + }, + { + "epoch": 0.17632241813602015, + "grad_norm": 1.3402578830718994, + "learning_rate": 2.9996316191067322e-05, + "loss": 1.2727, + "step": 35 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 1.158701777458191, + "learning_rate": 2.9981353818283835e-05, + "loss": 1.1433, + "step": 40 + }, + { + "epoch": 0.22670025188916876, + "grad_norm": 1.2656117677688599, + "learning_rate": 2.995489411751688e-05, + "loss": 1.0689, + "step": 45 + }, + { + "epoch": 0.2518891687657431, + "grad_norm": 1.8802189826965332, + "learning_rate": 2.9916957395065996e-05, + "loss": 0.9807, + "step": 50 + }, + { + "epoch": 0.2770780856423174, + "grad_norm": 1.335022211074829, + "learning_rate": 2.9867572765185192e-05, + "loss": 0.9382, + "step": 55 + }, + { + "epoch": 0.3022670025188917, + "grad_norm": 1.6188322305679321, + "learning_rate": 2.9806778127739467e-05, + "loss": 0.8862, + "step": 60 + }, + { + "epoch": 0.327455919395466, + "grad_norm": 2.044724941253662, + "learning_rate": 2.9734620139118812e-05, + "loss": 0.7866, + "step": 65 + }, + { + "epoch": 0.3526448362720403, + "grad_norm": 1.6731153726577759, + "learning_rate": 2.965115417643212e-05, + "loss": 0.7492, + "step": 70 + }, + { + "epoch": 0.3778337531486146, + "grad_norm": 2.1785879135131836, + "learning_rate": 2.9556444295008444e-05, + "loss": 0.6894, + "step": 75 + }, + { + "epoch": 0.40302267002518893, + "grad_norm": 1.5471223592758179, + "learning_rate": 2.9450563179238207e-05, + "loss": 0.6732, + "step": 80 + }, + { + "epoch": 0.4282115869017632, + "grad_norm": 1.8786218166351318, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.6336, + "step": 85 + }, + { + "epoch": 0.4534005037783375, + "grad_norm": 1.7442162036895752, + "learning_rate": 2.920562078626055e-05, + "loss": 0.5751, + "step": 90 + }, + { + "epoch": 0.47858942065491183, + "grad_norm": 1.6938061714172363, + "learning_rate": 2.9066747488261378e-05, + "loss": 0.5065, + "step": 95 + }, + { + "epoch": 0.5037783375314862, + "grad_norm": 1.7415169477462769, + "learning_rate": 2.8917078770068882e-05, + "loss": 0.4766, + "step": 100 + }, + { + "epoch": 0.5289672544080605, + "grad_norm": 1.7173844575881958, + "learning_rate": 2.8756729493821883e-05, + "loss": 0.4418, + "step": 105 + }, + { + "epoch": 0.5541561712846348, + "grad_norm": 1.9288567304611206, + "learning_rate": 2.8585822718373623e-05, + "loss": 0.4331, + "step": 110 + }, + { + "epoch": 0.5793450881612091, + "grad_norm": 1.5396711826324463, + "learning_rate": 2.8404489604851186e-05, + "loss": 0.4497, + "step": 115 + }, + { + "epoch": 0.6045340050377834, + "grad_norm": 1.8947474956512451, + "learning_rate": 2.821286931599684e-05, + "loss": 0.4026, + "step": 120 + }, + { + "epoch": 0.6297229219143576, + "grad_norm": 1.6779061555862427, + "learning_rate": 2.801110890936867e-05, + "loss": 0.3854, + "step": 125 + }, + { + "epoch": 0.654911838790932, + "grad_norm": 1.7231141328811646, + "learning_rate": 2.7799363224482334e-05, + "loss": 0.3455, + "step": 130 + }, + { + "epoch": 0.6801007556675063, + "grad_norm": 1.8772200345993042, + "learning_rate": 2.7577794763980634e-05, + "loss": 0.3439, + "step": 135 + }, + { + "epoch": 0.7052896725440806, + "grad_norm": 1.8255349397659302, + "learning_rate": 2.734657356892208e-05, + "loss": 0.3177, + "step": 140 + }, + { + "epoch": 0.7304785894206549, + "grad_norm": 1.402854084968567, + "learning_rate": 2.710587708828414e-05, + "loss": 0.2957, + "step": 145 + }, + { + "epoch": 0.7556675062972292, + "grad_norm": 1.6813524961471558, + "learning_rate": 2.685589004278139e-05, + "loss": 0.3197, + "step": 150 + }, + { + "epoch": 0.7808564231738035, + "grad_norm": 1.5237629413604736, + "learning_rate": 2.6596804283102928e-05, + "loss": 0.2561, + "step": 155 + }, + { + "epoch": 0.8060453400503779, + "grad_norm": 1.5846911668777466, + "learning_rate": 2.6328818642678026e-05, + "loss": 0.2371, + "step": 160 + }, + { + "epoch": 0.8312342569269522, + "grad_norm": 1.534378170967102, + "learning_rate": 2.6052138785082897e-05, + "loss": 0.2449, + "step": 165 + }, + { + "epoch": 0.8564231738035264, + "grad_norm": 1.4565974473953247, + "learning_rate": 2.5766977046205735e-05, + "loss": 0.2407, + "step": 170 + }, + { + "epoch": 0.8816120906801007, + "grad_norm": 1.1947420835494995, + "learning_rate": 2.5473552271291092e-05, + "loss": 0.2728, + "step": 175 + }, + { + "epoch": 0.906801007556675, + "grad_norm": 1.5921436548233032, + "learning_rate": 2.5172089646988765e-05, + "loss": 0.2882, + "step": 180 + }, + { + "epoch": 0.9319899244332494, + "grad_norm": 1.6625869274139404, + "learning_rate": 2.4862820528535955e-05, + "loss": 0.2672, + "step": 185 + }, + { + "epoch": 0.9571788413098237, + "grad_norm": 1.5483742952346802, + "learning_rate": 2.4545982262205455e-05, + "loss": 0.2395, + "step": 190 + }, + { + "epoch": 0.982367758186398, + "grad_norm": 1.62028169631958, + "learning_rate": 2.422181800315599e-05, + "loss": 0.2157, + "step": 195 + }, + { + "epoch": 1.0050377833753148, + "grad_norm": 1.294691801071167, + "learning_rate": 2.3890576528824637e-05, + "loss": 0.2204, + "step": 200 + }, + { + "epoch": 1.0302267002518892, + "grad_norm": 1.299323558807373, + "learning_rate": 2.3552512048004428e-05, + "loss": 0.1852, + "step": 205 + }, + { + "epoch": 1.0554156171284634, + "grad_norm": 1.1689867973327637, + "learning_rate": 2.3207884005753707e-05, + "loss": 0.1626, + "step": 210 + }, + { + "epoch": 1.0806045340050379, + "grad_norm": 0.9261149764060974, + "learning_rate": 2.2856956884286986e-05, + "loss": 0.1704, + "step": 215 + }, + { + "epoch": 1.105793450881612, + "grad_norm": 1.167427897453308, + "learning_rate": 2.25e-05, + "loss": 0.1596, + "step": 220 + }, + { + "epoch": 1.1309823677581865, + "grad_norm": 1.1806327104568481, + "learning_rate": 2.213728729678491e-05, + "loss": 0.1974, + "step": 225 + }, + { + "epoch": 1.1561712846347607, + "grad_norm": 0.896888256072998, + "learning_rate": 2.1769097135794052e-05, + "loss": 0.1646, + "step": 230 + }, + { + "epoch": 1.181360201511335, + "grad_norm": 1.2077964544296265, + "learning_rate": 2.139571208181381e-05, + "loss": 0.1953, + "step": 235 + }, + { + "epoch": 1.2065491183879093, + "grad_norm": 1.0712946653366089, + "learning_rate": 2.101741868641233e-05, + "loss": 0.1582, + "step": 240 + }, + { + "epoch": 1.2317380352644836, + "grad_norm": 1.1542234420776367, + "learning_rate": 2.0634507268027702e-05, + "loss": 0.1691, + "step": 245 + }, + { + "epoch": 1.256926952141058, + "grad_norm": 1.0322096347808838, + "learning_rate": 2.0247271689165226e-05, + "loss": 0.173, + "step": 250 + }, + { + "epoch": 1.2821158690176322, + "grad_norm": 1.101555585861206, + "learning_rate": 1.985600913087482e-05, + "loss": 0.1722, + "step": 255 + }, + { + "epoch": 1.3073047858942066, + "grad_norm": 1.0932804346084595, + "learning_rate": 1.946101986468167e-05, + "loss": 0.1592, + "step": 260 + }, + { + "epoch": 1.3324937027707808, + "grad_norm": 1.0992586612701416, + "learning_rate": 1.906260702214508e-05, + "loss": 0.1572, + "step": 265 + }, + { + "epoch": 1.3576826196473553, + "grad_norm": 0.9699830412864685, + "learning_rate": 1.866107636222242e-05, + "loss": 0.1401, + "step": 270 + }, + { + "epoch": 1.3828715365239295, + "grad_norm": 1.070984125137329, + "learning_rate": 1.82567360366167e-05, + "loss": 0.1641, + "step": 275 + }, + { + "epoch": 1.4080604534005037, + "grad_norm": 0.980705738067627, + "learning_rate": 1.7849896353287853e-05, + "loss": 0.1386, + "step": 280 + }, + { + "epoch": 1.433249370277078, + "grad_norm": 0.9951269626617432, + "learning_rate": 1.744086953830922e-05, + "loss": 0.1514, + "step": 285 + }, + { + "epoch": 1.4584382871536523, + "grad_norm": 0.8241779804229736, + "learning_rate": 1.702996949625197e-05, + "loss": 0.1427, + "step": 290 + }, + { + "epoch": 1.4836272040302267, + "grad_norm": 1.3388149738311768, + "learning_rate": 1.6617511569281382e-05, + "loss": 0.1408, + "step": 295 + }, + { + "epoch": 1.508816120906801, + "grad_norm": 1.1211367845535278, + "learning_rate": 1.6203812295149876e-05, + "loss": 0.1467, + "step": 300 + }, + { + "epoch": 1.5340050377833752, + "grad_norm": 1.0249072313308716, + "learning_rate": 1.5789189164272456e-05, + "loss": 0.1489, + "step": 305 + }, + { + "epoch": 1.5591939546599496, + "grad_norm": 0.8930245637893677, + "learning_rate": 1.5373960376071095e-05, + "loss": 0.1331, + "step": 310 + }, + { + "epoch": 1.584382871536524, + "grad_norm": 1.2546464204788208, + "learning_rate": 1.495844459477494e-05, + "loss": 0.1264, + "step": 315 + }, + { + "epoch": 1.6095717884130982, + "grad_norm": 0.9824733138084412, + "learning_rate": 1.4542960704863842e-05, + "loss": 0.1292, + "step": 320 + }, + { + "epoch": 1.6347607052896724, + "grad_norm": 0.9759043455123901, + "learning_rate": 1.4127827566342864e-05, + "loss": 0.1285, + "step": 325 + }, + { + "epoch": 1.6599496221662469, + "grad_norm": 1.0600088834762573, + "learning_rate": 1.371336377003551e-05, + "loss": 0.1309, + "step": 330 + }, + { + "epoch": 1.6851385390428213, + "grad_norm": 1.215764045715332, + "learning_rate": 1.3299887393083629e-05, + "loss": 0.128, + "step": 335 + }, + { + "epoch": 1.7103274559193955, + "grad_norm": 0.7637521624565125, + "learning_rate": 1.288771575484145e-05, + "loss": 0.131, + "step": 340 + }, + { + "epoch": 1.7355163727959697, + "grad_norm": 1.0411086082458496, + "learning_rate": 1.2477165173351256e-05, + "loss": 0.1225, + "step": 345 + }, + { + "epoch": 1.760705289672544, + "grad_norm": 0.7655975818634033, + "learning_rate": 1.206855072258742e-05, + "loss": 0.1223, + "step": 350 + }, + { + "epoch": 1.7858942065491183, + "grad_norm": 0.8303889036178589, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.1245, + "step": 355 + }, + { + "epoch": 1.8110831234256928, + "grad_norm": 0.9692918658256531, + "learning_rate": 1.1258382839130282e-05, + "loss": 0.128, + "step": 360 + }, + { + "epoch": 1.836272040302267, + "grad_norm": 0.6096221804618835, + "learning_rate": 1.0857451163722119e-05, + "loss": 0.1135, + "step": 365 + }, + { + "epoch": 1.8614609571788412, + "grad_norm": 0.8770790100097656, + "learning_rate": 1.0459698656447612e-05, + "loss": 0.1257, + "step": 370 + }, + { + "epoch": 1.8866498740554156, + "grad_norm": 0.6556209921836853, + "learning_rate": 1.0065430569494785e-05, + "loss": 0.1272, + "step": 375 + }, + { + "epoch": 1.91183879093199, + "grad_norm": 0.6120905876159668, + "learning_rate": 9.67494948095931e-06, + "loss": 0.1147, + "step": 380 + }, + { + "epoch": 1.9370277078085643, + "grad_norm": 0.8281330466270447, + "learning_rate": 9.288555062633258e-06, + "loss": 0.1115, + "step": 385 + }, + { + "epoch": 1.9622166246851385, + "grad_norm": 0.9107945561408997, + "learning_rate": 8.906543850024186e-06, + "loss": 0.1176, + "step": 390 + }, + { + "epoch": 1.987405541561713, + "grad_norm": 0.6128442883491516, + "learning_rate": 8.529209014781202e-06, + "loss": 0.1106, + "step": 395 + }, + { + "epoch": 2.0100755667506296, + "grad_norm": 0.5697270631790161, + "learning_rate": 8.156840139702554e-06, + "loss": 0.1082, + "step": 400 + }, + { + "epoch": 2.0352644836272042, + "grad_norm": 0.5294547080993652, + "learning_rate": 7.789722996497514e-06, + "loss": 0.1088, + "step": 405 + }, + { + "epoch": 2.0604534005037785, + "grad_norm": 0.5738343596458435, + "learning_rate": 7.4281393264729584e-06, + "loss": 0.1057, + "step": 410 + }, + { + "epoch": 2.0856423173803527, + "grad_norm": 0.6592573523521423, + "learning_rate": 7.072366624313169e-06, + "loss": 0.11, + "step": 415 + }, + { + "epoch": 2.110831234256927, + "grad_norm": 0.5268198847770691, + "learning_rate": 6.722677925118561e-06, + "loss": 0.1002, + "step": 420 + }, + { + "epoch": 2.136020151133501, + "grad_norm": 0.41333237290382385, + "learning_rate": 6.379341594866983e-06, + "loss": 0.1032, + "step": 425 + }, + { + "epoch": 2.1612090680100757, + "grad_norm": 0.5605461597442627, + "learning_rate": 6.0426211244582105e-06, + "loss": 0.1058, + "step": 430 + }, + { + "epoch": 2.18639798488665, + "grad_norm": 0.4502880275249481, + "learning_rate": 5.712774927499851e-06, + "loss": 0.1089, + "step": 435 + }, + { + "epoch": 2.211586901763224, + "grad_norm": 0.40764886140823364, + "learning_rate": 5.390056141989745e-06, + "loss": 0.111, + "step": 440 + }, + { + "epoch": 2.2367758186397984, + "grad_norm": 0.5187095999717712, + "learning_rate": 5.0747124360471125e-06, + "loss": 0.1042, + "step": 445 + }, + { + "epoch": 2.261964735516373, + "grad_norm": 0.5565797686576843, + "learning_rate": 4.766985817841482e-06, + "loss": 0.1024, + "step": 450 + }, + { + "epoch": 2.287153652392947, + "grad_norm": 0.60408616065979, + "learning_rate": 4.4671124498653624e-06, + "loss": 0.0981, + "step": 455 + }, + { + "epoch": 2.3123425692695214, + "grad_norm": 0.440290629863739, + "learning_rate": 4.175322467693068e-06, + "loss": 0.0982, + "step": 460 + }, + { + "epoch": 2.3375314861460956, + "grad_norm": 0.5031405091285706, + "learning_rate": 3.891839803364934e-06, + "loss": 0.1072, + "step": 465 + }, + { + "epoch": 2.36272040302267, + "grad_norm": 0.5585745573043823, + "learning_rate": 3.6168820135322987e-06, + "loss": 0.1071, + "step": 470 + }, + { + "epoch": 2.3879093198992445, + "grad_norm": 0.5705915093421936, + "learning_rate": 3.3506601124953246e-06, + "loss": 0.0974, + "step": 475 + }, + { + "epoch": 2.4130982367758187, + "grad_norm": 0.575580894947052, + "learning_rate": 3.0933784102616147e-06, + "loss": 0.1001, + "step": 480 + }, + { + "epoch": 2.438287153652393, + "grad_norm": 0.5322062969207764, + "learning_rate": 2.845234355750051e-06, + "loss": 0.1051, + "step": 485 + }, + { + "epoch": 2.463476070528967, + "grad_norm": 0.49697980284690857, + "learning_rate": 2.60641838526008e-06, + "loss": 0.0952, + "step": 490 + }, + { + "epoch": 2.4886649874055413, + "grad_norm": 0.5755271315574646, + "learning_rate": 2.3771137763228014e-06, + "loss": 0.1019, + "step": 495 + }, + { + "epoch": 2.513853904282116, + "grad_norm": 0.45311081409454346, + "learning_rate": 2.1574965070460047e-06, + "loss": 0.0954, + "step": 500 + }, + { + "epoch": 2.53904282115869, + "grad_norm": 0.5209969282150269, + "learning_rate": 1.947735121061088e-06, + "loss": 0.09, + "step": 505 + }, + { + "epoch": 2.5642317380352644, + "grad_norm": 0.540217399597168, + "learning_rate": 1.7479905981754917e-06, + "loss": 0.0964, + "step": 510 + }, + { + "epoch": 2.589420654911839, + "grad_norm": 0.41995298862457275, + "learning_rate": 1.5584162308299675e-06, + "loss": 0.1011, + "step": 515 + }, + { + "epoch": 2.6146095717884132, + "grad_norm": 0.38499268889427185, + "learning_rate": 1.3791575064554262e-06, + "loss": 0.0974, + "step": 520 + }, + { + "epoch": 2.6397984886649875, + "grad_norm": 0.5403231382369995, + "learning_rate": 1.2103519958197084e-06, + "loss": 0.1012, + "step": 525 + }, + { + "epoch": 2.6649874055415617, + "grad_norm": 0.42614272236824036, + "learning_rate": 1.052129247449915e-06, + "loss": 0.0916, + "step": 530 + }, + { + "epoch": 2.690176322418136, + "grad_norm": 0.4892199635505676, + "learning_rate": 9.046106882113753e-07, + "loss": 0.1076, + "step": 535 + }, + { + "epoch": 2.7153652392947105, + "grad_norm": 0.4003947973251343, + "learning_rate": 7.679095301194849e-07, + "loss": 0.0986, + "step": 540 + }, + { + "epoch": 2.7405541561712847, + "grad_norm": 0.4425778388977051, + "learning_rate": 6.421306834560126e-07, + "loss": 0.0949, + "step": 545 + }, + { + "epoch": 2.765743073047859, + "grad_norm": 0.3953980505466461, + "learning_rate": 5.273706762564761e-07, + "loss": 0.0967, + "step": 550 + }, + { + "epoch": 2.790931989924433, + "grad_norm": 0.4812396168708801, + "learning_rate": 4.2371758023042604e-07, + "loss": 0.099, + "step": 555 + }, + { + "epoch": 2.8161209068010074, + "grad_norm": 0.6512130498886108, + "learning_rate": 3.312509431714661e-07, + "loss": 0.0977, + "step": 560 + }, + { + "epoch": 2.841309823677582, + "grad_norm": 0.44170695543289185, + "learning_rate": 2.50041727908909e-07, + "loss": 0.0962, + "step": 565 + }, + { + "epoch": 2.866498740554156, + "grad_norm": 0.4869234561920166, + "learning_rate": 1.8015225784786483e-07, + "loss": 0.0932, + "step": 570 + }, + { + "epoch": 2.8916876574307304, + "grad_norm": 0.5406020879745483, + "learning_rate": 1.2163616913962395e-07, + "loss": 0.0964, + "step": 575 + }, + { + "epoch": 2.9168765743073046, + "grad_norm": 0.4305759370326996, + "learning_rate": 7.453836951897885e-08, + "loss": 0.1023, + "step": 580 + }, + { + "epoch": 2.942065491183879, + "grad_norm": 0.420620858669281, + "learning_rate": 3.889500384013755e-08, + "loss": 0.1, + "step": 585 + }, + { + "epoch": 2.9672544080604535, + "grad_norm": 0.4102805256843567, + "learning_rate": 1.4733426337610877e-08, + "loss": 0.0951, + "step": 590 + }, + { + "epoch": 2.9924433249370277, + "grad_norm": 0.40813589096069336, + "learning_rate": 2.0721796334149945e-09, + "loss": 0.088, + "step": 595 + }, + { + "epoch": 3.0, + "step": 597, + "total_flos": 1.760462524949463e+17, + "train_loss": 0.3074202121951472, + "train_runtime": 398.1856, + "train_samples_per_second": 47.827, + "train_steps_per_second": 1.499 + } + ], + "logging_steps": 5, + "max_steps": 597, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.760462524949463e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..aea69648b302a617afca480d031dbc81cb4d9672 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/6_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677355f93b880dc8b0018983a0996068ca8c39ed46516196d6ce342c4b266c29 +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..aef131afd515c26c3b36623a1e086e10a3e2bc8f --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 7_128_e3_3e-5 + results: [] +--- + + + +# 7_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..bfcff170f777b43bc2ad77bdeb17ea12dab58e85 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "o_proj", + "up_proj", + "down_proj", + "k_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b765e4759436ee40835ac403ed090fde9892a48 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbac639fb69e9795cb17d535f17caecadf1277360f6ac5370278e4cb193762e +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c359b152381a24b3903ab9bb82d28b595b06afb4 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.682776023545938e+17, + "train_loss": 0.31394977576604194, + "train_runtime": 397.2915, + "train_samples": 6319, + "train_samples_per_second": 47.716, + "train_steps_per_second": 1.495 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c359b152381a24b3903ab9bb82d28b595b06afb4 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.682776023545938e+17, + "train_loss": 0.31394977576604194, + "train_runtime": 397.2915, + "train_samples": 6319, + "train_samples_per_second": 47.716, + "train_steps_per_second": 1.495 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5cbe357ebe98ae0c4ed50f83953055da01bec097 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.7558250427246094, + "learning_rate": 4e-06, + "loss": 1.7657, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5437127351760864, + "learning_rate": 9e-06, + "loss": 1.7587, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.32441246509552, + "learning_rate": 1.4e-05, + "loss": 1.6032, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.5569583177566528, + "learning_rate": 1.9e-05, + "loss": 1.5551, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.4382705688476562, + "learning_rate": 2.4e-05, + "loss": 1.4879, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.3789088726043701, + "learning_rate": 2.9e-05, + "loss": 1.3209, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.3749150037765503, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.279, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.3754522800445557, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1726, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.5649147033691406, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1221, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.353642225265503, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0366, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.6599448919296265, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.9506, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.7037793397903442, + "learning_rate": 2.980472159255521e-05, + "loss": 0.9074, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.5244883298873901, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8517, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.81416654586792, + "learning_rate": 2.964744782530777e-05, + "loss": 0.8019, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.5552901029586792, + "learning_rate": 2.955173677376284e-05, + "loss": 0.6974, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.9680477380752563, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.7178, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 2.0363821983337402, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.6082, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.7685606479644775, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5632, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.8281863927841187, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.5291, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.674863576889038, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5153, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.6949875354766846, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4648, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.9897258281707764, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4615, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.5540549755096436, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3935, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 2.401559352874756, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3773, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.7393665313720703, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.4105, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.7087918519973755, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3514, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.3453165292739868, + "learning_rate": 2.7552684578024e-05, + "loss": 0.365, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.7580972909927368, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3211, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 2.0163815021514893, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3164, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.6415690183639526, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3533, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.9556576013565063, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.287, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.9556844234466553, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2857, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.7899670600891113, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2807, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.8158448934555054, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2624, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.6012107133865356, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2809, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.5088467597961426, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2431, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.4667609930038452, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2003, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.5927577018737793, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2288, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.5995004177093506, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2269, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.3875617980957031, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1978, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.532578706741333, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1874, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.387101411819458, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1724, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.401574730873108, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1825, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.152778148651123, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1874, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 0.89920574426651, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1715, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.507157564163208, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1772, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.2810171842575073, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1732, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.084065556526184, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1718, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.069628119468689, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1678, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.185738444328308, + "learning_rate": 2.015646466956045e-05, + "loss": 0.167, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 0.8281831741333008, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1787, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.9780508875846863, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1485, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.043724536895752, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1517, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.1145492792129517, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1727, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 1.0649261474609375, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1665, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.8617432117462158, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1494, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.99837327003479, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1606, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 1.1681628227233887, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1352, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.7786287069320679, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1348, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.2219550609588623, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1454, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.922601044178009, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1308, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.9506301283836365, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1331, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.7716006636619568, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1262, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 1.0607738494873047, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1472, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.6287097930908203, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1294, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.8589150905609131, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1225, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.9823206663131714, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1361, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.8830229640007019, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1205, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.8879204988479614, + "learning_rate": 1.234043966149462e-05, + "loss": 0.128, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.6110222935676575, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1254, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.7705934047698975, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1163, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.5971097350120544, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1248, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.7029211521148682, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1218, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.5868043303489685, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1204, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.5708044767379761, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1138, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.6876425743103027, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1162, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.6197518706321716, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1314, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.6735990047454834, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1094, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.7757262587547302, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1124, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5778895020484924, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1128, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5317203402519226, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1034, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.47459837794303894, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1055, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.5348770618438721, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1071, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.5305350422859192, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1083, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.4495435059070587, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1037, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.6187941431999207, + "learning_rate": 5.901757461493989e-06, + "loss": 0.0986, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.5800873041152954, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1097, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.5237676501274109, + "learning_rate": 5.25192736699541e-06, + "loss": 0.108, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.47025924921035767, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1042, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.6099388003349304, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1013, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.4531245827674866, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1047, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.5018891096115112, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1088, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.6921392679214478, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1139, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.5534802079200745, + "learning_rate": 3.491450893410134e-06, + "loss": 0.0985, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.5301963090896606, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.0998, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.4980686604976654, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.1055, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.5189515948295593, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1018, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.5097495317459106, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.0961, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.34911802411079407, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1001, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.463482141494751, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1073, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.46737411618232727, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1018, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.4543929994106293, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.1067, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.4744262993335724, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0977, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.4806784689426422, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.1043, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.4499480724334717, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0961, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.46673256158828735, + "learning_rate": 9.724785471955566e-07, + "loss": 0.097, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.4211287796497345, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1007, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.35493430495262146, + "learning_rate": 6.984393814019885e-07, + "loss": 0.095, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.44448724389076233, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0964, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.39334332942962646, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.1041, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.47510451078414917, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1002, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.5315325856208801, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.1063, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.45923057198524475, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0982, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.40152037143707275, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0993, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.4501343071460724, + "learning_rate": 9.298514701147898e-08, + "loss": 0.0984, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.36643949151039124, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0939, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.442359060049057, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.104, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.43114665150642395, + "learning_rate": 5.817209927129752e-09, + "loss": 0.1, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.682776023545938e+17, + "train_loss": 0.31394977576604194, + "train_runtime": 397.2915, + "train_samples_per_second": 47.716, + "train_steps_per_second": 1.495 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.682776023545938e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebbb25e19407dff247a6f88de844d101f94db38f --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/7_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1813d708f3813cc74634322e4a8799a5a9c90d5a697e5b69725f6f40fba3aaf5 +size 8273 diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d2909ce8621d3c502d9c7874fa98c05e56153e3b --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 8_128_e3_3e-5 + results: [] +--- + + + +# 8_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 1 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 4 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..31d22e27c205aec48c5a2066f59950a19aec791d --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "up_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c675ce06295659cc707aee46471af04cbef2ebf --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7140e24f32a70ba0b89082468dc5246f0b92ed8db431de4632686d0fddaa7a1a +size 671150064 diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..76e2373e964e937029182d13d9754f5ee2c9c8b3 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 2.983220060618752e+16, + "train_loss": 0.4447429109645146, + "train_runtime": 65.543, + "train_samples": 972, + "train_samples_per_second": 44.49, + "train_steps_per_second": 1.419 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1acb90225264091ebb8e25baed401fabe20462e --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/config.json @@ -0,0 +1,35 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..44171e7cbf5a42aeae98c6a15c71ffc767c40786 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba5b226e47e239920819d716ef04b706597802a9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..76e2373e964e937029182d13d9754f5ee2c9c8b3 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 2.983220060618752e+16, + "train_loss": 0.4447429109645146, + "train_runtime": 65.543, + "train_samples": 972, + "train_samples_per_second": 44.49, + "train_steps_per_second": 1.419 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ca01b160377de41c51cf6a95e1518d0ed6caa0d1 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/trainer_state.json @@ -0,0 +1,169 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 93, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.16393442622950818, + "grad_norm": 1.1792876720428467, + "learning_rate": 2.4e-05, + "loss": 1.6988, + "step": 5 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 1.4506335258483887, + "learning_rate": 2.984732162821399e-05, + "loss": 1.4223, + "step": 10 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 1.5559769868850708, + "learning_rate": 2.9232388752559797e-05, + "loss": 1.0265, + "step": 15 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 1.244294285774231, + "learning_rate": 2.8165184843508835e-05, + "loss": 0.7429, + "step": 20 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 1.3366678953170776, + "learning_rate": 2.6679623070746327e-05, + "loss": 0.6317, + "step": 25 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 1.5383589267730713, + "learning_rate": 2.482291100917928e-05, + "loss": 0.4345, + "step": 30 + }, + { + "epoch": 1.1311475409836065, + "grad_norm": 1.3185609579086304, + "learning_rate": 2.2654050495913498e-05, + "loss": 0.4011, + "step": 35 + }, + { + "epoch": 1.2950819672131146, + "grad_norm": 1.1856027841567993, + "learning_rate": 2.0241962693986477e-05, + "loss": 0.2904, + "step": 40 + }, + { + "epoch": 1.459016393442623, + "grad_norm": 1.3450500965118408, + "learning_rate": 1.7663297943814555e-05, + "loss": 0.2477, + "step": 45 + }, + { + "epoch": 1.6229508196721312, + "grad_norm": 0.9556318521499634, + "learning_rate": 1.5e-05, + "loss": 0.2247, + "step": 50 + }, + { + "epoch": 1.7868852459016393, + "grad_norm": 1.1485364437103271, + "learning_rate": 1.2336702056185454e-05, + "loss": 0.1846, + "step": 55 + }, + { + "epoch": 1.9508196721311475, + "grad_norm": 0.9281669855117798, + "learning_rate": 9.758037306013527e-06, + "loss": 0.1688, + "step": 60 + }, + { + "epoch": 2.098360655737705, + "grad_norm": 0.8051573038101196, + "learning_rate": 7.345949504086509e-06, + "loss": 0.1384, + "step": 65 + }, + { + "epoch": 2.262295081967213, + "grad_norm": 0.7551215291023254, + "learning_rate": 5.177088990820725e-06, + "loss": 0.1365, + "step": 70 + }, + { + "epoch": 2.4262295081967213, + "grad_norm": 0.8760083913803101, + "learning_rate": 3.3203769292536767e-06, + "loss": 0.117, + "step": 75 + }, + { + "epoch": 2.5901639344262293, + "grad_norm": 0.73171067237854, + "learning_rate": 1.8348151564911653e-06, + "loss": 0.109, + "step": 80 + }, + { + "epoch": 2.7540983606557377, + "grad_norm": 0.8879324197769165, + "learning_rate": 7.676112474402069e-07, + "loss": 0.1142, + "step": 85 + }, + { + "epoch": 2.918032786885246, + "grad_norm": 0.7704339623451233, + "learning_rate": 1.5267837178600974e-07, + "loss": 0.1196, + "step": 90 + }, + { + "epoch": 3.0, + "step": 93, + "total_flos": 2.983220060618752e+16, + "train_loss": 0.4447429109645146, + "train_runtime": 65.543, + "train_samples_per_second": 44.49, + "train_steps_per_second": 1.419 + } + ], + "logging_steps": 5, + "max_steps": 93, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.983220060618752e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d8217ab408f3dce2cb1356eacec78a237ffef313 --- /dev/null +++ b/barexam_qa_train_knowledge_100_base/8_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32111857bc29064bc50e302687639f98e8718a4bd7e03dd7d8781d650e5f2dce +size 8273 diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d1d7b96e731943e92f6ca5812b253c75f28916ac --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 0_128_e3_3e-5 + results: [] +--- + + + +# 0_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dc5492ca3b41fa9ce2ea4e7a29c79ca89063296d --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "gate_proj", + "down_proj", + "up_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24dfd8d03893f482e20a1684ac44454715d99668 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a4f18e539e2e75c57e21c87966c0d915768b0b3324f1d1f1d95d9a934461c7 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8bd850db8ac1e139a2affe33d1ca5db70799217e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.907839192876974e+17, + "train_loss": 0.30943006307340515, + "train_runtime": 399.2699, + "train_samples": 6285, + "train_samples_per_second": 47.224, + "train_steps_per_second": 1.48 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8bd850db8ac1e139a2affe33d1ca5db70799217e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.907839192876974e+17, + "train_loss": 0.30943006307340515, + "train_runtime": 399.2699, + "train_samples": 6285, + "train_samples_per_second": 47.224, + "train_steps_per_second": 1.48 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..39129c08603f3c32c6233a1b9e617560e0f5087f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 591, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02544529262086514, + "grad_norm": 2.185513496398926, + "learning_rate": 4e-06, + "loss": 1.8375, + "step": 5 + }, + { + "epoch": 0.05089058524173028, + "grad_norm": 1.6614359617233276, + "learning_rate": 9e-06, + "loss": 1.7477, + "step": 10 + }, + { + "epoch": 0.07633587786259542, + "grad_norm": 1.2822216749191284, + "learning_rate": 1.4e-05, + "loss": 1.6694, + "step": 15 + }, + { + "epoch": 0.10178117048346055, + "grad_norm": 1.161952018737793, + "learning_rate": 1.9e-05, + "loss": 1.585, + "step": 20 + }, + { + "epoch": 0.1272264631043257, + "grad_norm": 1.1386936902999878, + "learning_rate": 2.4e-05, + "loss": 1.4585, + "step": 25 + }, + { + "epoch": 0.15267175572519084, + "grad_norm": 1.4390007257461548, + "learning_rate": 2.9e-05, + "loss": 1.3924, + "step": 30 + }, + { + "epoch": 0.178117048346056, + "grad_norm": 1.7908406257629395, + "learning_rate": 2.9996236974947768e-05, + "loss": 1.2405, + "step": 35 + }, + { + "epoch": 0.2035623409669211, + "grad_norm": 1.3969963788986206, + "learning_rate": 2.998095292147853e-05, + "loss": 1.256, + "step": 40 + }, + { + "epoch": 0.22900763358778625, + "grad_norm": 1.463012933731079, + "learning_rate": 2.9953924623165958e-05, + "loss": 1.1223, + "step": 45 + }, + { + "epoch": 0.2544529262086514, + "grad_norm": 1.2692265510559082, + "learning_rate": 2.9915173268712462e-05, + "loss": 1.1111, + "step": 50 + }, + { + "epoch": 0.27989821882951654, + "grad_norm": 1.4686758518218994, + "learning_rate": 2.9864729237053014e-05, + "loss": 0.9964, + "step": 55 + }, + { + "epoch": 0.3053435114503817, + "grad_norm": 1.5869241952896118, + "learning_rate": 2.9802632073539752e-05, + "loss": 0.894, + "step": 60 + }, + { + "epoch": 0.33078880407124683, + "grad_norm": 1.5675218105316162, + "learning_rate": 2.97289304589406e-05, + "loss": 0.8762, + "step": 65 + }, + { + "epoch": 0.356234096692112, + "grad_norm": 1.5942552089691162, + "learning_rate": 2.9643682171276208e-05, + "loss": 0.8124, + "step": 70 + }, + { + "epoch": 0.3816793893129771, + "grad_norm": 1.5860236883163452, + "learning_rate": 2.9546954040525144e-05, + "loss": 0.7387, + "step": 75 + }, + { + "epoch": 0.4071246819338422, + "grad_norm": 1.6817950010299683, + "learning_rate": 2.9438821896232884e-05, + "loss": 0.6942, + "step": 80 + }, + { + "epoch": 0.43256997455470736, + "grad_norm": 1.820928692817688, + "learning_rate": 2.93193705080656e-05, + "loss": 0.6341, + "step": 85 + }, + { + "epoch": 0.4580152671755725, + "grad_norm": 1.7973545789718628, + "learning_rate": 2.9188693519355373e-05, + "loss": 0.5804, + "step": 90 + }, + { + "epoch": 0.48346055979643765, + "grad_norm": 1.5497148036956787, + "learning_rate": 2.9046893373689004e-05, + "loss": 0.5337, + "step": 95 + }, + { + "epoch": 0.5089058524173028, + "grad_norm": 2.188654899597168, + "learning_rate": 2.8894081234597826e-05, + "loss": 0.4985, + "step": 100 + }, + { + "epoch": 0.5343511450381679, + "grad_norm": 1.7541404962539673, + "learning_rate": 2.873037689841161e-05, + "loss": 0.4617, + "step": 105 + }, + { + "epoch": 0.5597964376590331, + "grad_norm": 1.815367341041565, + "learning_rate": 2.8555908700344826e-05, + "loss": 0.4114, + "step": 110 + }, + { + "epoch": 0.5852417302798982, + "grad_norm": 2.130046844482422, + "learning_rate": 2.837081341388887e-05, + "loss": 0.3757, + "step": 115 + }, + { + "epoch": 0.6106870229007634, + "grad_norm": 1.9897003173828125, + "learning_rate": 2.8175236143589143e-05, + "loss": 0.4133, + "step": 120 + }, + { + "epoch": 0.6361323155216285, + "grad_norm": 1.7446224689483643, + "learning_rate": 2.7969330211291083e-05, + "loss": 0.3531, + "step": 125 + }, + { + "epoch": 0.6615776081424937, + "grad_norm": 1.8930778503417969, + "learning_rate": 2.7753257035944216e-05, + "loss": 0.3763, + "step": 130 + }, + { + "epoch": 0.6870229007633588, + "grad_norm": 1.5313451290130615, + "learning_rate": 2.7527186007058584e-05, + "loss": 0.3372, + "step": 135 + }, + { + "epoch": 0.712468193384224, + "grad_norm": 1.4236750602722168, + "learning_rate": 2.729129435191267e-05, + "loss": 0.3155, + "step": 140 + }, + { + "epoch": 0.7379134860050891, + "grad_norm": 1.6176203489303589, + "learning_rate": 2.7045766996616914e-05, + "loss": 0.3087, + "step": 145 + }, + { + "epoch": 0.7633587786259542, + "grad_norm": 1.7124353647232056, + "learning_rate": 2.6790796421141813e-05, + "loss": 0.2856, + "step": 150 + }, + { + "epoch": 0.7888040712468194, + "grad_norm": 1.5271896123886108, + "learning_rate": 2.652658250842418e-05, + "loss": 0.2447, + "step": 155 + }, + { + "epoch": 0.8142493638676844, + "grad_norm": 1.5979710817337036, + "learning_rate": 2.6253332387669896e-05, + "loss": 0.2634, + "step": 160 + }, + { + "epoch": 0.8396946564885496, + "grad_norm": 1.5221363306045532, + "learning_rate": 2.597126027197598e-05, + "loss": 0.2626, + "step": 165 + }, + { + "epoch": 0.8651399491094147, + "grad_norm": 1.371504783630371, + "learning_rate": 2.5680587290399283e-05, + "loss": 0.2288, + "step": 170 + }, + { + "epoch": 0.8905852417302799, + "grad_norm": 1.4546705484390259, + "learning_rate": 2.5381541314603425e-05, + "loss": 0.237, + "step": 175 + }, + { + "epoch": 0.916030534351145, + "grad_norm": 1.35300874710083, + "learning_rate": 2.5074356780219952e-05, + "loss": 0.2112, + "step": 180 + }, + { + "epoch": 0.9414758269720102, + "grad_norm": 1.0715548992156982, + "learning_rate": 2.4759274503063632e-05, + "loss": 0.2214, + "step": 185 + }, + { + "epoch": 0.9669211195928753, + "grad_norm": 1.4081146717071533, + "learning_rate": 2.44365414903461e-05, + "loss": 0.2146, + "step": 190 + }, + { + "epoch": 0.9923664122137404, + "grad_norm": 1.2087507247924805, + "learning_rate": 2.410641074703575e-05, + "loss": 0.237, + "step": 195 + }, + { + "epoch": 1.015267175572519, + "grad_norm": 1.2860759496688843, + "learning_rate": 2.3769141077515717e-05, + "loss": 0.1793, + "step": 200 + }, + { + "epoch": 1.0407124681933841, + "grad_norm": 0.9982023239135742, + "learning_rate": 2.342499688269547e-05, + "loss": 0.1714, + "step": 205 + }, + { + "epoch": 1.0661577608142494, + "grad_norm": 1.2555969953536987, + "learning_rate": 2.3074247952734994e-05, + "loss": 0.1731, + "step": 210 + }, + { + "epoch": 1.0916030534351144, + "grad_norm": 1.1304209232330322, + "learning_rate": 2.271716925554411e-05, + "loss": 0.1634, + "step": 215 + }, + { + "epoch": 1.1170483460559797, + "grad_norm": 0.982666552066803, + "learning_rate": 2.2354040721222733e-05, + "loss": 0.152, + "step": 220 + }, + { + "epoch": 1.1424936386768447, + "grad_norm": 1.0110431909561157, + "learning_rate": 2.198514702261104e-05, + "loss": 0.1693, + "step": 225 + }, + { + "epoch": 1.16793893129771, + "grad_norm": 1.0067222118377686, + "learning_rate": 2.1610777352121578e-05, + "loss": 0.1605, + "step": 230 + }, + { + "epoch": 1.193384223918575, + "grad_norm": 1.0095269680023193, + "learning_rate": 2.12312251950283e-05, + "loss": 0.1504, + "step": 235 + }, + { + "epoch": 1.2188295165394403, + "grad_norm": 1.2519747018814087, + "learning_rate": 2.084678809939019e-05, + "loss": 0.1537, + "step": 240 + }, + { + "epoch": 1.2442748091603053, + "grad_norm": 0.9259910583496094, + "learning_rate": 2.0457767442789962e-05, + "loss": 0.1583, + "step": 245 + }, + { + "epoch": 1.2697201017811706, + "grad_norm": 1.132098913192749, + "learning_rate": 2.0064468196070533e-05, + "loss": 0.1491, + "step": 250 + }, + { + "epoch": 1.2951653944020356, + "grad_norm": 1.2423208951950073, + "learning_rate": 1.9667198684254643e-05, + "loss": 0.1603, + "step": 255 + }, + { + "epoch": 1.3206106870229006, + "grad_norm": 1.1261388063430786, + "learning_rate": 1.9266270344834946e-05, + "loss": 0.1707, + "step": 260 + }, + { + "epoch": 1.3460559796437659, + "grad_norm": 0.8643352389335632, + "learning_rate": 1.8861997483624136e-05, + "loss": 0.1412, + "step": 265 + }, + { + "epoch": 1.3715012722646311, + "grad_norm": 1.2666823863983154, + "learning_rate": 1.8454697028356413e-05, + "loss": 0.1492, + "step": 270 + }, + { + "epoch": 1.3969465648854962, + "grad_norm": 1.0581042766571045, + "learning_rate": 1.8044688280233543e-05, + "loss": 0.1324, + "step": 275 + }, + { + "epoch": 1.4223918575063612, + "grad_norm": 0.7991112470626831, + "learning_rate": 1.7632292663610245e-05, + "loss": 0.1353, + "step": 280 + }, + { + "epoch": 1.4478371501272265, + "grad_norm": 0.7823576331138611, + "learning_rate": 1.721783347401513e-05, + "loss": 0.136, + "step": 285 + }, + { + "epoch": 1.4732824427480917, + "grad_norm": 0.8686821460723877, + "learning_rate": 1.6801635624704777e-05, + "loss": 0.1325, + "step": 290 + }, + { + "epoch": 1.4987277353689568, + "grad_norm": 1.1668267250061035, + "learning_rate": 1.638402539194953e-05, + "loss": 0.1449, + "step": 295 + }, + { + "epoch": 1.5241730279898218, + "grad_norm": 0.8599678874015808, + "learning_rate": 1.5965330159250847e-05, + "loss": 0.1344, + "step": 300 + }, + { + "epoch": 1.549618320610687, + "grad_norm": 0.951873242855072, + "learning_rate": 1.5545878160690586e-05, + "loss": 0.1293, + "step": 305 + }, + { + "epoch": 1.5750636132315523, + "grad_norm": 0.7677724957466125, + "learning_rate": 1.5125998223613501e-05, + "loss": 0.1268, + "step": 310 + }, + { + "epoch": 1.6005089058524173, + "grad_norm": 0.9568209052085876, + "learning_rate": 1.4706019510844666e-05, + "loss": 0.12, + "step": 315 + }, + { + "epoch": 1.6259541984732824, + "grad_norm": 0.7867422699928284, + "learning_rate": 1.4286271262643866e-05, + "loss": 0.1187, + "step": 320 + }, + { + "epoch": 1.6513994910941476, + "grad_norm": 0.634045422077179, + "learning_rate": 1.3867082538599317e-05, + "loss": 0.112, + "step": 325 + }, + { + "epoch": 1.6768447837150129, + "grad_norm": 0.989046573638916, + "learning_rate": 1.3448781959663005e-05, + "loss": 0.1214, + "step": 330 + }, + { + "epoch": 1.7022900763358777, + "grad_norm": 0.6529427170753479, + "learning_rate": 1.3031697450529904e-05, + "loss": 0.1254, + "step": 335 + }, + { + "epoch": 1.727735368956743, + "grad_norm": 0.6848628520965576, + "learning_rate": 1.2616155982563004e-05, + "loss": 0.117, + "step": 340 + }, + { + "epoch": 1.7531806615776082, + "grad_norm": 0.8497040271759033, + "learning_rate": 1.2202483317465706e-05, + "loss": 0.1212, + "step": 345 + }, + { + "epoch": 1.7786259541984732, + "grad_norm": 0.9917230010032654, + "learning_rate": 1.1791003751902542e-05, + "loss": 0.1198, + "step": 350 + }, + { + "epoch": 1.8040712468193383, + "grad_norm": 0.8941617608070374, + "learning_rate": 1.1382039863268376e-05, + "loss": 0.1147, + "step": 355 + }, + { + "epoch": 1.8295165394402035, + "grad_norm": 0.6777728796005249, + "learning_rate": 1.0975912256805437e-05, + "loss": 0.1124, + "step": 360 + }, + { + "epoch": 1.8549618320610688, + "grad_norm": 0.7454141974449158, + "learning_rate": 1.0572939314266403e-05, + "loss": 0.114, + "step": 365 + }, + { + "epoch": 1.8804071246819338, + "grad_norm": 0.7609145045280457, + "learning_rate": 1.0173436944320583e-05, + "loss": 0.117, + "step": 370 + }, + { + "epoch": 1.9058524173027989, + "grad_norm": 0.5622754096984863, + "learning_rate": 9.777718334898859e-06, + "loss": 0.1078, + "step": 375 + }, + { + "epoch": 1.9312977099236641, + "grad_norm": 0.6229256987571716, + "learning_rate": 9.386093707671545e-06, + "loss": 0.1178, + "step": 380 + }, + { + "epoch": 1.9567430025445294, + "grad_norm": 0.5940735936164856, + "learning_rate": 8.998870074851604e-06, + "loss": 0.1098, + "step": 385 + }, + { + "epoch": 1.9821882951653944, + "grad_norm": 0.6833120584487915, + "learning_rate": 8.61635099851395e-06, + "loss": 0.1043, + "step": 390 + }, + { + "epoch": 2.005089058524173, + "grad_norm": 0.3818974494934082, + "learning_rate": 8.238836352619426e-06, + "loss": 0.1121, + "step": 395 + }, + { + "epoch": 2.030534351145038, + "grad_norm": 0.6358445882797241, + "learning_rate": 7.866622087930076e-06, + "loss": 0.1008, + "step": 400 + }, + { + "epoch": 2.0559796437659035, + "grad_norm": 0.45088618993759155, + "learning_rate": 7.500000000000004e-06, + "loss": 0.0996, + "step": 405 + }, + { + "epoch": 2.0814249363867683, + "grad_norm": 0.6999068856239319, + "learning_rate": 7.1392575004236655e-06, + "loss": 0.1051, + "step": 410 + }, + { + "epoch": 2.1068702290076335, + "grad_norm": 0.5026888251304626, + "learning_rate": 6.7846773915209535e-06, + "loss": 0.1024, + "step": 415 + }, + { + "epoch": 2.132315521628499, + "grad_norm": 0.7465732097625732, + "learning_rate": 6.436537644635706e-06, + "loss": 0.0993, + "step": 420 + }, + { + "epoch": 2.157760814249364, + "grad_norm": 0.38138750195503235, + "learning_rate": 6.0951111822214225e-06, + "loss": 0.1006, + "step": 425 + }, + { + "epoch": 2.183206106870229, + "grad_norm": 0.4187212288379669, + "learning_rate": 5.760665663885047e-06, + "loss": 0.1035, + "step": 430 + }, + { + "epoch": 2.208651399491094, + "grad_norm": 0.4972746968269348, + "learning_rate": 5.43346327655652e-06, + "loss": 0.1008, + "step": 435 + }, + { + "epoch": 2.2340966921119594, + "grad_norm": 0.5020768046379089, + "learning_rate": 5.113760528948623e-06, + "loss": 0.097, + "step": 440 + }, + { + "epoch": 2.2595419847328246, + "grad_norm": 0.5039191842079163, + "learning_rate": 4.80180805046822e-06, + "loss": 0.0972, + "step": 445 + }, + { + "epoch": 2.2849872773536894, + "grad_norm": 0.3754376769065857, + "learning_rate": 4.497850394736564e-06, + "loss": 0.0972, + "step": 450 + }, + { + "epoch": 2.3104325699745547, + "grad_norm": 0.5178637504577637, + "learning_rate": 4.202125847872678e-06, + "loss": 0.0971, + "step": 455 + }, + { + "epoch": 2.33587786259542, + "grad_norm": 0.6803424954414368, + "learning_rate": 3.914866241690115e-06, + "loss": 0.1092, + "step": 460 + }, + { + "epoch": 2.3613231552162848, + "grad_norm": 0.560177743434906, + "learning_rate": 3.6362967719535444e-06, + "loss": 0.0982, + "step": 465 + }, + { + "epoch": 2.38676844783715, + "grad_norm": 0.40049490332603455, + "learning_rate": 3.3666358218376274e-06, + "loss": 0.0893, + "step": 470 + }, + { + "epoch": 2.4122137404580153, + "grad_norm": 0.5520178079605103, + "learning_rate": 3.106094790726594e-06, + "loss": 0.0929, + "step": 475 + }, + { + "epoch": 2.4376590330788805, + "grad_norm": 0.4715534746646881, + "learning_rate": 2.8548779284887443e-06, + "loss": 0.0997, + "step": 480 + }, + { + "epoch": 2.4631043256997454, + "grad_norm": 0.4045238494873047, + "learning_rate": 2.6131821753557395e-06, + "loss": 0.096, + "step": 485 + }, + { + "epoch": 2.4885496183206106, + "grad_norm": 0.4760097861289978, + "learning_rate": 2.38119700753228e-06, + "loss": 0.0934, + "step": 490 + }, + { + "epoch": 2.513994910941476, + "grad_norm": 0.3995264172554016, + "learning_rate": 2.159104288657164e-06, + "loss": 0.0897, + "step": 495 + }, + { + "epoch": 2.539440203562341, + "grad_norm": 0.47208571434020996, + "learning_rate": 1.947078127232169e-06, + "loss": 0.0976, + "step": 500 + }, + { + "epoch": 2.564885496183206, + "grad_norm": 0.5406742691993713, + "learning_rate": 1.74528474013055e-06, + "loss": 0.0902, + "step": 505 + }, + { + "epoch": 2.590330788804071, + "grad_norm": 0.4943133592605591, + "learning_rate": 1.5538823222921288e-06, + "loss": 0.0953, + "step": 510 + }, + { + "epoch": 2.6157760814249365, + "grad_norm": 0.5011233687400818, + "learning_rate": 1.3730209227071439e-06, + "loss": 0.0926, + "step": 515 + }, + { + "epoch": 2.6412213740458013, + "grad_norm": 0.3400381803512573, + "learning_rate": 1.2028423267860806e-06, + "loss": 0.0894, + "step": 520 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.4541306793689728, + "learning_rate": 1.0434799452076915e-06, + "loss": 0.084, + "step": 525 + }, + { + "epoch": 2.6921119592875318, + "grad_norm": 0.4862406849861145, + "learning_rate": 8.950587093323437e-07, + "loss": 0.0824, + "step": 530 + }, + { + "epoch": 2.717557251908397, + "grad_norm": 0.42118147015571594, + "learning_rate": 7.576949732626881e-07, + "loss": 0.0917, + "step": 535 + }, + { + "epoch": 2.7430025445292623, + "grad_norm": 0.4657740890979767, + "learning_rate": 6.314964226284381e-07, + "loss": 0.0864, + "step": 540 + }, + { + "epoch": 2.768447837150127, + "grad_norm": 0.394163578748703, + "learning_rate": 5.165619901667313e-07, + "loss": 0.0927, + "step": 545 + }, + { + "epoch": 2.7938931297709924, + "grad_norm": 0.42008286714553833, + "learning_rate": 4.1298177816430917e-07, + "loss": 0.0895, + "step": 550 + }, + { + "epoch": 2.8193384223918576, + "grad_norm": 0.4386577010154724, + "learning_rate": 3.2083698782226e-07, + "loss": 0.0885, + "step": 555 + }, + { + "epoch": 2.8447837150127224, + "grad_norm": 0.43868088722229004, + "learning_rate": 2.401998555987389e-07, + "loss": 0.0904, + "step": 560 + }, + { + "epoch": 2.8702290076335877, + "grad_norm": 0.37441524863243103, + "learning_rate": 1.7113359657954354e-07, + "loss": 0.0846, + "step": 565 + }, + { + "epoch": 2.895674300254453, + "grad_norm": 0.4274452328681946, + "learning_rate": 1.1369235492096397e-07, + "loss": 0.0833, + "step": 570 + }, + { + "epoch": 2.921119592875318, + "grad_norm": 0.41157642006874084, + "learning_rate": 6.792116140373117e-08, + "loss": 0.092, + "step": 575 + }, + { + "epoch": 2.9465648854961835, + "grad_norm": 0.43704795837402344, + "learning_rate": 3.385589813135692e-08, + "loss": 0.0954, + "step": 580 + }, + { + "epoch": 2.9720101781170483, + "grad_norm": 0.6181573867797852, + "learning_rate": 1.1523270400535246e-08, + "loss": 0.0863, + "step": 585 + }, + { + "epoch": 2.9974554707379135, + "grad_norm": 0.40105581283569336, + "learning_rate": 9.407857656540398e-10, + "loss": 0.0903, + "step": 590 + }, + { + "epoch": 3.0, + "step": 591, + "total_flos": 1.907839192876974e+17, + "train_loss": 0.30943006307340515, + "train_runtime": 399.2699, + "train_samples_per_second": 47.224, + "train_steps_per_second": 1.48 + } + ], + "logging_steps": 5, + "max_steps": 591, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.907839192876974e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..641e9e54dc407650930ef9df5462a5bed7351661 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/0_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8ea94eff00d345c74fde3521fb65a3a78ccef0ab492eb042aa29ccf03924457 +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ec0d4f19377397f64d4b53c38d4e8b85c6060f69 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 1_128_e3_3e-5 + results: [] +--- + + + +# 1_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ceefccd4423b9eaed6ca4b29097d9994dfd7685a --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "o_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..212bf8e99bb883535843ed110f944bb28c84cb14 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2998dfef631d3f77c59ec8a7417199f93f14feaed258f7630c7aeb2e51f522a +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea51862d3b692e42f322f1bdd59cceea630e666 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5021974776866406e+17, + "train_loss": 0.3112027775057237, + "train_runtime": 394.5742, + "train_samples": 6318, + "train_samples_per_second": 48.037, + "train_steps_per_second": 1.505 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0ea51862d3b692e42f322f1bdd59cceea630e666 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.5021974776866406e+17, + "train_loss": 0.3112027775057237, + "train_runtime": 394.5742, + "train_samples": 6318, + "train_samples_per_second": 48.037, + "train_steps_per_second": 1.505 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1588931cfcf09c3a1471a556b6a33240421828d5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 1.9061869382858276, + "learning_rate": 4e-06, + "loss": 1.9597, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5602107048034668, + "learning_rate": 9e-06, + "loss": 1.8202, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.7158585786819458, + "learning_rate": 1.4e-05, + "loss": 1.7377, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.4812885522842407, + "learning_rate": 1.9e-05, + "loss": 1.612, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.3336032629013062, + "learning_rate": 2.4e-05, + "loss": 1.5549, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.3786550760269165, + "learning_rate": 2.9e-05, + "loss": 1.3869, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.276178240776062, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2671, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.6530488729476929, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1331, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.4882540702819824, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.066, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.5282142162322998, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0162, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.7334626913070679, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.889, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.66727876663208, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8443, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.7484904527664185, + "learning_rate": 2.973179790633317e-05, + "loss": 0.7257, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.8529925346374512, + "learning_rate": 2.964744782530777e-05, + "loss": 0.6847, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.9660626649856567, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7008, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.8111497163772583, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6144, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.8136364221572876, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5656, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.8329651355743408, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5171, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.9824817180633545, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4616, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.563026785850525, + "learning_rate": 2.890567022607206e-05, + "loss": 0.4414, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.8968859910964966, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.393, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.5285414457321167, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.3921, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.9067039489746094, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3985, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.7344640493392944, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3659, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.420234203338623, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3432, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.2950036525726318, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3221, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.9601715803146362, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3117, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.9277644157409668, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.2893, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.7262303829193115, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3049, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.42188560962677, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.2816, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.3298652172088623, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2484, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.434443712234497, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2621, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.0894466638565063, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2377, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.4609211683273315, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2309, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.5908238887786865, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2377, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.351332426071167, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.228, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.5344481468200684, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2122, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.1269736289978027, + "learning_rate": 2.449165826796448e-05, + "loss": 0.218, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.3009496927261353, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2356, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.059199333190918, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1824, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.0177764892578125, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1805, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.0934476852416992, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1899, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.1171568632125854, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1763, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 0.9943208694458008, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1771, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.278827428817749, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1825, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 0.8600234389305115, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1518, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.0938999652862549, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1619, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.349503993988037, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1793, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 0.8902052640914917, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1613, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 0.9741739630699158, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1659, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 1.0536047220230103, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1595, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.9340528845787048, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1608, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 0.6965076923370361, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1512, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.056610345840454, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1589, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 0.7976199388504028, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1478, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.8931915163993835, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1526, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.9223748445510864, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1454, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.909565806388855, + "learning_rate": 1.69164662490578e-05, + "loss": 0.154, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.805518388748169, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1436, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.0353010892868042, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1505, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.7843387126922607, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1307, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.6886826157569885, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1385, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.7356799244880676, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1497, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.6093630790710449, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1405, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.6327088475227356, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1311, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.6402142643928528, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1409, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.7124189138412476, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1406, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.671903133392334, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1324, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.7043671607971191, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1294, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.6634692549705505, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1268, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.6615920066833496, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1214, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.6837024092674255, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1288, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.5877581238746643, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1327, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.674550473690033, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1349, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.6443624496459961, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1255, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.5226499438285828, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1215, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.7179994583129883, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1277, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.5595511198043823, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1316, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.5030422210693359, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1262, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5426155924797058, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1192, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5918163657188416, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1258, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.5572388172149658, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1187, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.49676188826560974, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1172, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.5085633993148804, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1252, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.4180028438568115, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1111, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.5751785635948181, + "learning_rate": 5.901757461493989e-06, + "loss": 0.124, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.389871746301651, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1133, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.45625385642051697, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1089, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.6761993765830994, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1131, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.6136049032211304, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1214, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.544038712978363, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1146, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.4689734876155853, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1121, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.46388712525367737, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1207, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.41752922534942627, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1151, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.4803059697151184, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1141, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.502129852771759, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.1129, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.35653620958328247, + "learning_rate": 2.728715183008864e-06, + "loss": 0.106, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.4587688446044922, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1077, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.4375070631504059, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1129, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.49074283242225647, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1131, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.6047240495681763, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1066, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.382255494594574, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.1112, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.3675282895565033, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.1123, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.4314248263835907, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.1175, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.37447261810302734, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.1189, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.49230727553367615, + "learning_rate": 9.724785471955566e-07, + "loss": 0.1073, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.38441258668899536, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1146, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.43436095118522644, + "learning_rate": 6.984393814019885e-07, + "loss": 0.1191, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.4243687391281128, + "learning_rate": 5.780078276432865e-07, + "loss": 0.1104, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.3795343041419983, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.1071, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.4169410169124603, + "learning_rate": 3.707877563706158e-07, + "loss": 0.108, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.3465197682380676, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.1082, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.37261855602264404, + "learning_rate": 2.089461901495715e-07, + "loss": 0.1148, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.5080525279045105, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.107, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.3758437931537628, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1037, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.3598708510398865, + "learning_rate": 5.232782089872601e-08, + "loss": 0.1108, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.3743054270744324, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1009, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.5117127895355225, + "learning_rate": 5.817209927129752e-09, + "loss": 0.1099, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.5021974776866406e+17, + "train_loss": 0.3112027775057237, + "train_runtime": 394.5742, + "train_samples_per_second": 48.037, + "train_steps_per_second": 1.505 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5021974776866406e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8c094999c5be3fffeb84831ca966fa3daf12266 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/1_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77f265a530a23f60bb3edefb35335984cbc458d5044c504014ee6c5722bb34a +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c49f479c22b9165ccda8af96aa93c3b5d89b1693 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 2_128_e3_3e-5 + results: [] +--- + + + +# 2_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..83563d361364133451bc1d9a319dade5d35369b9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "gate_proj", + "o_proj", + "down_proj", + "v_proj", + "up_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a53c0e9f51d794afa37cf27a38f7241ed0c6b32 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f2534ed7243a7e302329e3d55c498e409b8acefee0a1c77c5c760d85ecf48c +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..86140c343f1a279970f19d82dd5c9cbdd2d552cb --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8577037903213363e+17, + "train_loss": 0.3193545310886621, + "train_runtime": 399.0881, + "train_samples": 6319, + "train_samples_per_second": 47.501, + "train_steps_per_second": 1.488 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..86140c343f1a279970f19d82dd5c9cbdd2d552cb --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.8577037903213363e+17, + "train_loss": 0.3193545310886621, + "train_runtime": 399.0881, + "train_samples": 6319, + "train_samples_per_second": 47.501, + "train_steps_per_second": 1.488 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bd72069c90d8141f46867a2ff31d90e48a032909 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 2.002516269683838, + "learning_rate": 4e-06, + "loss": 1.8231, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.649857521057129, + "learning_rate": 9e-06, + "loss": 1.8378, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.5214952230453491, + "learning_rate": 1.4e-05, + "loss": 1.8125, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.7914001941680908, + "learning_rate": 1.9e-05, + "loss": 1.6637, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.9953486323356628, + "learning_rate": 2.4e-05, + "loss": 1.5106, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.3606514930725098, + "learning_rate": 2.9e-05, + "loss": 1.4557, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.3602023124694824, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.391, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.4638868570327759, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.2137, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.3959581851959229, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.126, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.4204320907592773, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0649, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.518527865409851, + "learning_rate": 2.9866162322321703e-05, + "loss": 1.0542, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.707120418548584, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8931, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.4905670881271362, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8059, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.6304559707641602, + "learning_rate": 2.964744782530777e-05, + "loss": 0.7625, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.6033687591552734, + "learning_rate": 2.955173677376284e-05, + "loss": 0.783, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 2.3308515548706055, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.687, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.6521958112716675, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.6499, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.6817365884780884, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.6356, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.7427502870559692, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.5627, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.895095944404602, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5066, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.6610841751098633, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4636, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.9132581949234009, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4336, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.6124558448791504, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3997, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.924505591392517, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3919, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.6724390983581543, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3872, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.5458909273147583, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3257, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.8567590713500977, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3412, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.734501838684082, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3097, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.6966663599014282, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3515, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.504404902458191, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3308, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.8258802890777588, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.3139, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.5246692895889282, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2865, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.518206000328064, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2993, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.9520102739334106, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2516, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.5437290668487549, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2721, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 2.0806777477264404, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2603, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.376752495765686, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2319, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.5539520978927612, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2094, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.4395561218261719, + "learning_rate": 2.416452950136248e-05, + "loss": 0.228, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.1388119459152222, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1919, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.316052794456482, + "learning_rate": 2.348920641495893e-05, + "loss": 0.185, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.4365193843841553, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1828, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.2865010499954224, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1897, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 0.9742973446846008, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1496, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.1967049837112427, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1688, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.3256216049194336, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1723, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.1585544347763062, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1832, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 0.8748406767845154, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1538, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.126293659210205, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1463, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.1821399927139282, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1555, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 1.101980447769165, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1547, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.8311046361923218, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1519, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.0522958040237427, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1504, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.071820616722107, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1586, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 0.8881751894950867, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1355, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.6197173595428467, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1466, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.8258532285690308, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1378, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.8648748993873596, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1417, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.828565239906311, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1402, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 0.9346043467521667, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1279, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 1.4877666234970093, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1393, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.8524649739265442, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1358, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.9757581949234009, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1383, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.9850208759307861, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1388, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 1.1285622119903564, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1199, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.7264949083328247, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1241, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.6368124485015869, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1217, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 1.0692720413208008, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1261, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.840882420539856, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1228, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.7021074295043945, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1154, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.6899158358573914, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1174, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.5609990358352661, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1372, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.6572065949440002, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1189, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.6720581650733948, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1222, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.7419654130935669, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1168, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.44448477029800415, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1033, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 1.464971661567688, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1159, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.8036788105964661, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1155, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.6747683882713318, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1051, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.6806677579879761, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1233, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5269436240196228, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1071, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.5295878648757935, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1059, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.7095851302146912, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1115, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.471158891916275, + "learning_rate": 6.579809488801994e-06, + "loss": 0.103, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.5723422765731812, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1078, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.5698457956314087, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1061, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.6177956461906433, + "learning_rate": 5.573186564064649e-06, + "loss": 0.099, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.4102683365345001, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1006, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.44732460379600525, + "learning_rate": 4.938229047911652e-06, + "loss": 0.0917, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.500673770904541, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1048, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.5484961271286011, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1018, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.5635212063789368, + "learning_rate": 4.044902040769963e-06, + "loss": 0.0969, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.5047913193702698, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0998, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.6078349351882935, + "learning_rate": 3.491450893410134e-06, + "loss": 0.0981, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.35562747716903687, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.092, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.4682256281375885, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.094, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.5176949501037598, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1003, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.6243091225624084, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.0894, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.402149498462677, + "learning_rate": 2.267485570730894e-06, + "loss": 0.0941, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.4452066123485565, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.0996, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.47796857357025146, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.0963, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.6449517011642456, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0995, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.5395673513412476, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0939, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.3867981433868408, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0951, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.4024958312511444, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0964, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.4273724853992462, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0974, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.4772111177444458, + "learning_rate": 8.299636258812199e-07, + "loss": 0.0953, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.4828636646270752, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0977, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.4697018265724182, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0967, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.38666704297065735, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0922, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.5693273544311523, + "learning_rate": 3.707877563706158e-07, + "loss": 0.0981, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.4001903831958771, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0989, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.47257307171821594, + "learning_rate": 2.089461901495715e-07, + "loss": 0.095, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.3343694508075714, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0924, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.4973941743373871, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1016, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4095208942890167, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0948, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.448032945394516, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1013, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.5051411390304565, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0938, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.8577037903213363e+17, + "train_loss": 0.3193545310886621, + "train_runtime": 399.0881, + "train_samples_per_second": 47.501, + "train_steps_per_second": 1.488 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.8577037903213363e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0eab3abcc93f9128bb428df2bf5027a6bd88c413 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/2_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4f54a37f3599cdcb5fc18fc1dacf79db0f3e73d8931fa1df31f7932607e9ea +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..66d73ebfa4bde68148ac30533264ac0b7b4fa07e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 3_128_e3_3e-5 + results: [] +--- + + + +# 3_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..af6234385389d0a30465c70ee5d35cb4f24ea5c3 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "gate_proj", + "down_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b68020f91ec5c4732deb27f2e9da265a06ce4a1d --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1394bdd6387c9434b810e7b99eba0d94b891849fe3fbc46891d535d37e578e +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e39c40b5231190fb0dee824211045b3037a5bd9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7093599182402355e+17, + "train_loss": 0.3059398982962374, + "train_runtime": 394.5995, + "train_samples": 6314, + "train_samples_per_second": 48.003, + "train_steps_per_second": 1.505 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e39c40b5231190fb0dee824211045b3037a5bd9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7093599182402355e+17, + "train_loss": 0.3059398982962374, + "train_runtime": 394.5995, + "train_samples": 6314, + "train_samples_per_second": 48.003, + "train_steps_per_second": 1.505 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b2707697e3e70688f5c6d83e1ce4f3deecb306dc --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 2.1562540531158447, + "learning_rate": 4e-06, + "loss": 1.8254, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.6545473337173462, + "learning_rate": 9e-06, + "loss": 1.7885, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.3013375997543335, + "learning_rate": 1.4e-05, + "loss": 1.7313, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.3056925535202026, + "learning_rate": 1.9e-05, + "loss": 1.6158, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.190002679824829, + "learning_rate": 2.4e-05, + "loss": 1.5106, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.4247568845748901, + "learning_rate": 2.9e-05, + "loss": 1.4034, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.339274525642395, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2431, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.4095524549484253, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1647, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.7350642681121826, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1018, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.340848445892334, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0453, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.6519027948379517, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.9215, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.4725686311721802, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8229, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.6548978090286255, + "learning_rate": 2.973179790633317e-05, + "loss": 0.7822, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.9303576946258545, + "learning_rate": 2.964744782530777e-05, + "loss": 0.7431, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 2.0301804542541504, + "learning_rate": 2.955173677376284e-05, + "loss": 0.6742, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.730038046836853, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.592, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.7500624656677246, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5319, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.5946540832519531, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.515, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.6962826251983643, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4732, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.9455208778381348, + "learning_rate": 2.890567022607206e-05, + "loss": 0.4312, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.6911556720733643, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.495, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 2.0076141357421875, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.3955, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.4360873699188232, + "learning_rate": 2.838778253789822e-05, + "loss": 0.4061, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.5890793800354004, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3475, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.5185612440109253, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3473, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.4125462770462036, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3377, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.883421778678894, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3227, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.3061182498931885, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.2955, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.7235853672027588, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.287, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 2.0008676052093506, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3049, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.4427059888839722, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2595, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.4125834703445435, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2271, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.242905616760254, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2492, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.1471246480941772, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2166, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.3648968935012817, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2318, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.4676387310028076, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.218, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.6451185941696167, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.24, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.069006323814392, + "learning_rate": 2.449165826796448e-05, + "loss": 0.1969, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.4578324556350708, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2034, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.1442625522613525, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1799, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.036299705505371, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1745, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.1312758922576904, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1657, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.1621540784835815, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1646, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.100042700767517, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1686, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.017505168914795, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.163, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 0.9852712750434875, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1609, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 0.9172077178955078, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1596, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 0.8619810938835144, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1503, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.0919078588485718, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1688, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.0963231325149536, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1866, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 0.8398768305778503, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1652, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.6848553419113159, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1596, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 0.9217065572738647, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1452, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 1.0225164890289307, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1555, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 1.139595866203308, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1408, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.753288984298706, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1326, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.7644430994987488, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1759, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.8440877199172974, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1477, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.7954363226890564, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1639, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 0.6868990659713745, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1378, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.9243471026420593, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1342, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.8142603635787964, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1313, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.8507310748100281, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1444, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.8170554041862488, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1319, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.7943320870399475, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1226, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.8410393595695496, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1327, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.845255434513092, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1326, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 1.2915281057357788, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1213, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.6749197244644165, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1238, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.7248027920722961, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1327, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.5806834101676941, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1197, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.7702064514160156, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1207, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.6646297574043274, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1197, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.6011528968811035, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1205, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.5142061114311218, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1287, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.41029369831085205, + "learning_rate": 9.53099740988206e-06, + "loss": 0.121, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.47006338834762573, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1151, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.5823071002960205, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1233, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.7716848254203796, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1184, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5094692707061768, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1044, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5581040978431702, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1054, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.4306275248527527, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.11, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.4816688597202301, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1072, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.4921349585056305, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1138, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.46835899353027344, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1079, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.4173777997493744, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1085, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.4964480996131897, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1134, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.5015546679496765, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1121, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.5558993220329285, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1037, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.393706738948822, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1064, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.5425444841384888, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1059, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.44033050537109375, + "learning_rate": 4.044902040769963e-06, + "loss": 0.0983, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.3397301137447357, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0969, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.4087347686290741, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1075, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.33908811211586, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1008, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.33232635259628296, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0937, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.4118949770927429, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1039, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.46213337779045105, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1068, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.4858943223953247, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1043, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.345076322555542, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1026, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.48015856742858887, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1036, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.38156649470329285, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0974, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.35295358300209045, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0967, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.39833804965019226, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.1052, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.3993915617465973, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0967, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.3908717930316925, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0983, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.38738128542900085, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1035, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.3662972152233124, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0993, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.3754114806652069, + "learning_rate": 5.780078276432865e-07, + "loss": 0.101, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.373713880777359, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0977, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.5567549467086792, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1086, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.3375025987625122, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0953, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.37593477964401245, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0941, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.45855438709259033, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0963, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.3653884530067444, + "learning_rate": 9.298514701147898e-08, + "loss": 0.1017, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.3625771701335907, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0967, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.4073554575443268, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1007, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.3815722167491913, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0999, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.7093599182402355e+17, + "train_loss": 0.3059398982962374, + "train_runtime": 394.5995, + "train_samples_per_second": 48.003, + "train_steps_per_second": 1.505 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7093599182402355e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2f6f3b7f94823d3efb1e8371fd5cdd3e74e97d8c --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/3_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f14d26c914f110fe866c52061dbc57ca4897d766f3cc120f1fdad4f665e50a0 +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..91798747a24b023f9d9c869debe13a80e2f41833 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 4_128_e3_3e-5 + results: [] +--- + + + +# 4_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..691df4bc32c1d66ccaa7a2c20a03d26219eaf41b --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "v_proj", + "down_proj", + "q_proj", + "k_proj", + "o_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57e10f9896bf0d7fa39626990d922415fe47450e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b7c46e7e39c2db210a11121318e27da1e786dcc53d4207223ef785e35ca813 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ff37e3513162e717eb2468d80d36cc96977ae19e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7976253992245658e+17, + "train_loss": 0.30435471833755673, + "train_runtime": 388.532, + "train_samples": 6308, + "train_samples_per_second": 48.706, + "train_steps_per_second": 1.529 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ff37e3513162e717eb2468d80d36cc96977ae19e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.7976253992245658e+17, + "train_loss": 0.30435471833755673, + "train_runtime": 388.532, + "train_samples": 6308, + "train_samples_per_second": 48.706, + "train_steps_per_second": 1.529 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d874d1c720cafc589d82ece7682618c5583ce4b2 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.025348542458808618, + "grad_norm": 1.6561226844787598, + "learning_rate": 4e-06, + "loss": 1.7935, + "step": 5 + }, + { + "epoch": 0.050697084917617236, + "grad_norm": 1.523027777671814, + "learning_rate": 9e-06, + "loss": 1.7958, + "step": 10 + }, + { + "epoch": 0.07604562737642585, + "grad_norm": 1.664865493774414, + "learning_rate": 1.4e-05, + "loss": 1.7051, + "step": 15 + }, + { + "epoch": 0.10139416983523447, + "grad_norm": 1.2727614641189575, + "learning_rate": 1.9e-05, + "loss": 1.5141, + "step": 20 + }, + { + "epoch": 0.1267427122940431, + "grad_norm": 1.4189029932022095, + "learning_rate": 2.4e-05, + "loss": 1.3921, + "step": 25 + }, + { + "epoch": 0.1520912547528517, + "grad_norm": 1.0927263498306274, + "learning_rate": 2.9e-05, + "loss": 1.4349, + "step": 30 + }, + { + "epoch": 0.17743979721166034, + "grad_norm": 1.4581407308578491, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2722, + "step": 35 + }, + { + "epoch": 0.20278833967046894, + "grad_norm": 1.5585311651229858, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.196, + "step": 40 + }, + { + "epoch": 0.22813688212927757, + "grad_norm": 1.5323458909988403, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.0677, + "step": 45 + }, + { + "epoch": 0.2534854245880862, + "grad_norm": 1.4341201782226562, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0109, + "step": 50 + }, + { + "epoch": 0.2788339670468948, + "grad_norm": 1.6387184858322144, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.938, + "step": 55 + }, + { + "epoch": 0.3041825095057034, + "grad_norm": 1.7413619756698608, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8508, + "step": 60 + }, + { + "epoch": 0.32953105196451205, + "grad_norm": 1.5124279260635376, + "learning_rate": 2.973179790633317e-05, + "loss": 0.81, + "step": 65 + }, + { + "epoch": 0.3548795944233207, + "grad_norm": 1.6984989643096924, + "learning_rate": 2.964744782530777e-05, + "loss": 0.6767, + "step": 70 + }, + { + "epoch": 0.38022813688212925, + "grad_norm": 1.6731104850769043, + "learning_rate": 2.955173677376284e-05, + "loss": 0.6858, + "step": 75 + }, + { + "epoch": 0.4055766793409379, + "grad_norm": 2.0132181644439697, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6941, + "step": 80 + }, + { + "epoch": 0.4309252217997465, + "grad_norm": 2.153615713119507, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.6123, + "step": 85 + }, + { + "epoch": 0.45627376425855515, + "grad_norm": 1.8144164085388184, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5121, + "step": 90 + }, + { + "epoch": 0.4816223067173637, + "grad_norm": 1.9865628480911255, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4792, + "step": 95 + }, + { + "epoch": 0.5069708491761724, + "grad_norm": 1.8175711631774902, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5123, + "step": 100 + }, + { + "epoch": 0.532319391634981, + "grad_norm": 1.613901138305664, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4531, + "step": 105 + }, + { + "epoch": 0.5576679340937896, + "grad_norm": 1.5604621171951294, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4312, + "step": 110 + }, + { + "epoch": 0.5830164765525983, + "grad_norm": 1.5780329704284668, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3842, + "step": 115 + }, + { + "epoch": 0.6083650190114068, + "grad_norm": 1.5390725135803223, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3674, + "step": 120 + }, + { + "epoch": 0.6337135614702155, + "grad_norm": 1.7650567293167114, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3785, + "step": 125 + }, + { + "epoch": 0.6590621039290241, + "grad_norm": 1.4897276163101196, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3501, + "step": 130 + }, + { + "epoch": 0.6844106463878327, + "grad_norm": 1.7872884273529053, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3024, + "step": 135 + }, + { + "epoch": 0.7097591888466414, + "grad_norm": 1.550950527191162, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3183, + "step": 140 + }, + { + "epoch": 0.7351077313054499, + "grad_norm": 1.621933102607727, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.2971, + "step": 145 + }, + { + "epoch": 0.7604562737642585, + "grad_norm": 1.6464223861694336, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.2892, + "step": 150 + }, + { + "epoch": 0.7858048162230672, + "grad_norm": 1.3703575134277344, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2693, + "step": 155 + }, + { + "epoch": 0.8111533586818758, + "grad_norm": 1.511803388595581, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2689, + "step": 160 + }, + { + "epoch": 0.8365019011406845, + "grad_norm": 1.404079556465149, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2433, + "step": 165 + }, + { + "epoch": 0.861850443599493, + "grad_norm": 1.5329846143722534, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2591, + "step": 170 + }, + { + "epoch": 0.8871989860583016, + "grad_norm": 1.5939494371414185, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2453, + "step": 175 + }, + { + "epoch": 0.9125475285171103, + "grad_norm": 1.6148059368133545, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2437, + "step": 180 + }, + { + "epoch": 0.9378960709759189, + "grad_norm": 1.2476797103881836, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2193, + "step": 185 + }, + { + "epoch": 0.9632446134347274, + "grad_norm": 1.076555848121643, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2123, + "step": 190 + }, + { + "epoch": 0.9885931558935361, + "grad_norm": 1.3888609409332275, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2219, + "step": 195 + }, + { + "epoch": 1.0101394169835234, + "grad_norm": 1.4178423881530762, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1923, + "step": 200 + }, + { + "epoch": 1.035487959442332, + "grad_norm": 1.1423120498657227, + "learning_rate": 2.348920641495893e-05, + "loss": 0.185, + "step": 205 + }, + { + "epoch": 1.0608365019011408, + "grad_norm": 1.3319388628005981, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1677, + "step": 210 + }, + { + "epoch": 1.0861850443599492, + "grad_norm": 1.088462233543396, + "learning_rate": 2.278755057120863e-05, + "loss": 0.185, + "step": 215 + }, + { + "epoch": 1.111533586818758, + "grad_norm": 0.8259502649307251, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1583, + "step": 220 + }, + { + "epoch": 1.1368821292775666, + "grad_norm": 1.4107177257537842, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.179, + "step": 225 + }, + { + "epoch": 1.162230671736375, + "grad_norm": 1.1114238500595093, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1597, + "step": 230 + }, + { + "epoch": 1.1875792141951838, + "grad_norm": 0.8854601979255676, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1674, + "step": 235 + }, + { + "epoch": 1.2129277566539924, + "grad_norm": 1.0892236232757568, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.149, + "step": 240 + }, + { + "epoch": 1.2382762991128011, + "grad_norm": 0.9785470962524414, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1409, + "step": 245 + }, + { + "epoch": 1.2636248415716096, + "grad_norm": 0.8894004821777344, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1473, + "step": 250 + }, + { + "epoch": 1.2889733840304183, + "grad_norm": 1.1719945669174194, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1512, + "step": 255 + }, + { + "epoch": 1.3143219264892267, + "grad_norm": 0.9404357671737671, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1402, + "step": 260 + }, + { + "epoch": 1.3396704689480354, + "grad_norm": 0.8582985401153564, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1443, + "step": 265 + }, + { + "epoch": 1.3650190114068441, + "grad_norm": 1.0110599994659424, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1455, + "step": 270 + }, + { + "epoch": 1.3903675538656528, + "grad_norm": 1.1634562015533447, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1486, + "step": 275 + }, + { + "epoch": 1.4157160963244613, + "grad_norm": 0.866890013217926, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.136, + "step": 280 + }, + { + "epoch": 1.44106463878327, + "grad_norm": 0.7105090618133545, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1379, + "step": 285 + }, + { + "epoch": 1.4664131812420786, + "grad_norm": 0.7500168681144714, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1355, + "step": 290 + }, + { + "epoch": 1.491761723700887, + "grad_norm": 0.9626094698905945, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1347, + "step": 295 + }, + { + "epoch": 1.5171102661596958, + "grad_norm": 0.8724218606948853, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1279, + "step": 300 + }, + { + "epoch": 1.5424588086185045, + "grad_norm": 0.9358634948730469, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1412, + "step": 305 + }, + { + "epoch": 1.5678073510773132, + "grad_norm": 0.6705332398414612, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1299, + "step": 310 + }, + { + "epoch": 1.5931558935361216, + "grad_norm": 0.7313225269317627, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1281, + "step": 315 + }, + { + "epoch": 1.6185044359949303, + "grad_norm": 0.7642185091972351, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.1208, + "step": 320 + }, + { + "epoch": 1.6438529784537388, + "grad_norm": 1.3064942359924316, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1254, + "step": 325 + }, + { + "epoch": 1.6692015209125475, + "grad_norm": 0.6972230672836304, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1197, + "step": 330 + }, + { + "epoch": 1.6945500633713562, + "grad_norm": 0.9306526184082031, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1259, + "step": 335 + }, + { + "epoch": 1.7198986058301649, + "grad_norm": 0.5681966543197632, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1127, + "step": 340 + }, + { + "epoch": 1.7452471482889735, + "grad_norm": 0.8996943235397339, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1225, + "step": 345 + }, + { + "epoch": 1.770595690747782, + "grad_norm": 0.951020359992981, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1251, + "step": 350 + }, + { + "epoch": 1.7959442332065905, + "grad_norm": 0.6577113270759583, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1264, + "step": 355 + }, + { + "epoch": 1.8212927756653992, + "grad_norm": 0.8777438998222351, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1224, + "step": 360 + }, + { + "epoch": 1.8466413181242078, + "grad_norm": 0.6046962141990662, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1133, + "step": 365 + }, + { + "epoch": 1.8719898605830165, + "grad_norm": 0.44175079464912415, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1065, + "step": 370 + }, + { + "epoch": 1.8973384030418252, + "grad_norm": 0.6563915610313416, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1256, + "step": 375 + }, + { + "epoch": 1.9226869455006337, + "grad_norm": 0.6005930304527283, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1076, + "step": 380 + }, + { + "epoch": 1.9480354879594424, + "grad_norm": 0.5279455184936523, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1198, + "step": 385 + }, + { + "epoch": 1.9733840304182508, + "grad_norm": 0.6069126725196838, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1045, + "step": 390 + }, + { + "epoch": 1.9987325728770595, + "grad_norm": 0.7707761526107788, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1097, + "step": 395 + }, + { + "epoch": 2.0202788339670468, + "grad_norm": 0.44990038871765137, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1172, + "step": 400 + }, + { + "epoch": 2.0456273764258555, + "grad_norm": 0.45071685314178467, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1075, + "step": 405 + }, + { + "epoch": 2.070975918884664, + "grad_norm": 0.4331156015396118, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.0967, + "step": 410 + }, + { + "epoch": 2.096324461343473, + "grad_norm": 0.4310593903064728, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1032, + "step": 415 + }, + { + "epoch": 2.1216730038022815, + "grad_norm": 0.3920035660266876, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1027, + "step": 420 + }, + { + "epoch": 2.14702154626109, + "grad_norm": 0.4757051467895508, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1072, + "step": 425 + }, + { + "epoch": 2.1723700887198985, + "grad_norm": 0.6130903363227844, + "learning_rate": 5.901757461493989e-06, + "loss": 0.0988, + "step": 430 + }, + { + "epoch": 2.197718631178707, + "grad_norm": 0.4754129946231842, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1043, + "step": 435 + }, + { + "epoch": 2.223067173637516, + "grad_norm": 0.41680026054382324, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1059, + "step": 440 + }, + { + "epoch": 2.2484157160963245, + "grad_norm": 0.6718032956123352, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1067, + "step": 445 + }, + { + "epoch": 2.273764258555133, + "grad_norm": 0.4084990620613098, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.0999, + "step": 450 + }, + { + "epoch": 2.299112801013942, + "grad_norm": 0.3744699954986572, + "learning_rate": 4.334482243338589e-06, + "loss": 0.0944, + "step": 455 + }, + { + "epoch": 2.32446134347275, + "grad_norm": 0.4577121138572693, + "learning_rate": 4.044902040769963e-06, + "loss": 0.0941, + "step": 460 + }, + { + "epoch": 2.349809885931559, + "grad_norm": 0.374050498008728, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0953, + "step": 465 + }, + { + "epoch": 2.3751584283903675, + "grad_norm": 0.6266903281211853, + "learning_rate": 3.491450893410134e-06, + "loss": 0.1047, + "step": 470 + }, + { + "epoch": 2.400506970849176, + "grad_norm": 0.38013356924057007, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.1043, + "step": 475 + }, + { + "epoch": 2.425855513307985, + "grad_norm": 0.4865313768386841, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0982, + "step": 480 + }, + { + "epoch": 2.4512040557667936, + "grad_norm": 0.42500025033950806, + "learning_rate": 2.728715183008864e-06, + "loss": 0.0972, + "step": 485 + }, + { + "epoch": 2.4765525982256023, + "grad_norm": 0.5390872359275818, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.1007, + "step": 490 + }, + { + "epoch": 2.5019011406844105, + "grad_norm": 0.4385237693786621, + "learning_rate": 2.267485570730894e-06, + "loss": 0.096, + "step": 495 + }, + { + "epoch": 2.527249683143219, + "grad_norm": 0.41958874464035034, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.0973, + "step": 500 + }, + { + "epoch": 2.552598225602028, + "grad_norm": 0.44675490260124207, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1003, + "step": 505 + }, + { + "epoch": 2.5779467680608366, + "grad_norm": 0.5456805229187012, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.0969, + "step": 510 + }, + { + "epoch": 2.6032953105196452, + "grad_norm": 0.36848530173301697, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0937, + "step": 515 + }, + { + "epoch": 2.6286438529784535, + "grad_norm": 0.5201994776725769, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0925, + "step": 520 + }, + { + "epoch": 2.653992395437262, + "grad_norm": 0.6071819067001343, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.1029, + "step": 525 + }, + { + "epoch": 2.679340937896071, + "grad_norm": 0.5291131734848022, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0932, + "step": 530 + }, + { + "epoch": 2.7046894803548795, + "grad_norm": 0.4224088788032532, + "learning_rate": 8.299636258812199e-07, + "loss": 0.0955, + "step": 535 + }, + { + "epoch": 2.7300380228136882, + "grad_norm": 0.3658817410469055, + "learning_rate": 6.984393814019885e-07, + "loss": 0.09, + "step": 540 + }, + { + "epoch": 2.755386565272497, + "grad_norm": 0.440145879983902, + "learning_rate": 5.780078276432865e-07, + "loss": 0.1025, + "step": 545 + }, + { + "epoch": 2.7807351077313056, + "grad_norm": 0.38738197088241577, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0986, + "step": 550 + }, + { + "epoch": 2.8060836501901143, + "grad_norm": 0.4406212270259857, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1, + "step": 555 + }, + { + "epoch": 2.8314321926489225, + "grad_norm": 0.394199401140213, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0959, + "step": 560 + }, + { + "epoch": 2.8567807351077312, + "grad_norm": 0.37208065390586853, + "learning_rate": 2.089461901495715e-07, + "loss": 0.096, + "step": 565 + }, + { + "epoch": 2.88212927756654, + "grad_norm": 0.3883321285247803, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0902, + "step": 570 + }, + { + "epoch": 2.9074778200253486, + "grad_norm": 0.38405996561050415, + "learning_rate": 9.298514701147898e-08, + "loss": 0.0846, + "step": 575 + }, + { + "epoch": 2.9328263624841573, + "grad_norm": 0.3878246545791626, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0904, + "step": 580 + }, + { + "epoch": 2.9581749049429655, + "grad_norm": 0.39115506410598755, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.0948, + "step": 585 + }, + { + "epoch": 2.983523447401774, + "grad_norm": 0.3717835545539856, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0919, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.7976253992245658e+17, + "train_loss": 0.30435471833755673, + "train_runtime": 388.532, + "train_samples_per_second": 48.706, + "train_steps_per_second": 1.529 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7976253992245658e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..36d4020b747e060353741c8916991c48c334f306 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/4_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f076dce72c741a3e03becf00bdabdb52320eec421c62e5c13079afaca932a6be +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b41dd3b592b5730c04c2281bb93f8913a74cd4b3 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 5_128_e3_3e-5 + results: [] +--- + + + +# 5_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fcbb7477e7915d99343f8a22083a950045a6c394 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6f0c74c0cbf3e08feb2fc7354a9db88742e0820 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26a01e06c7eab661d2f91f6a26165bbfa81c70eca28e6c6335165bc09801d1b +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c603503e1bca84e054cce6588fa2335b79d57709 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.737476473356288e+17, + "train_loss": 0.3038629004951278, + "train_runtime": 391.2563, + "train_samples": 6318, + "train_samples_per_second": 48.444, + "train_steps_per_second": 1.518 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c603503e1bca84e054cce6588fa2335b79d57709 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.737476473356288e+17, + "train_loss": 0.3038629004951278, + "train_runtime": 391.2563, + "train_samples": 6318, + "train_samples_per_second": 48.444, + "train_steps_per_second": 1.518 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dcb2e09f319ef620e3b4a427038b9811b0b75bd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 2.1971282958984375, + "learning_rate": 4e-06, + "loss": 1.8736, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.814112663269043, + "learning_rate": 9e-06, + "loss": 1.7416, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.4835845232009888, + "learning_rate": 1.4e-05, + "loss": 1.7622, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.0956412553787231, + "learning_rate": 1.9e-05, + "loss": 1.6261, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.455289602279663, + "learning_rate": 2.4e-05, + "loss": 1.5818, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.185299038887024, + "learning_rate": 2.9e-05, + "loss": 1.4273, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.4258959293365479, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.2631, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.401149034500122, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.1858, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.5403932332992554, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1888, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.3796842098236084, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0275, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.5719767808914185, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.975, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.8190611600875854, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8459, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.740968942642212, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8039, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 2.073073148727417, + "learning_rate": 2.964744782530777e-05, + "loss": 0.6807, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.789018988609314, + "learning_rate": 2.955173677376284e-05, + "loss": 0.7348, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.666628122329712, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6286, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.7373601198196411, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5884, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.5950886011123657, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5026, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.8034368753433228, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.4618, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.7091020345687866, + "learning_rate": 2.890567022607206e-05, + "loss": 0.4917, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.8915801048278809, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4168, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.5468560457229614, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4008, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.8026851415634155, + "learning_rate": 2.838778253789822e-05, + "loss": 0.381, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 2.051422595977783, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3561, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.482897400856018, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3502, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.6578572988510132, + "learning_rate": 2.777648771814114e-05, + "loss": 0.2828, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.5223747491836548, + "learning_rate": 2.7552684578024e-05, + "loss": 0.2929, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 2.1412177085876465, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3123, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.6944425106048584, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.281, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.4856200218200684, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.2636, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 1.6004363298416138, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2792, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.3828482627868652, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2596, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.426095724105835, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2265, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.8577830791473389, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2298, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.3018509149551392, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2179, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.4330244064331055, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2192, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.4108957052230835, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.2197, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.6099053621292114, + "learning_rate": 2.449165826796448e-05, + "loss": 0.1976, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.3250007629394531, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2126, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.4380967617034912, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1763, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.105892539024353, + "learning_rate": 2.348920641495893e-05, + "loss": 0.1646, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.4180781841278076, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1978, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.0595381259918213, + "learning_rate": 2.278755057120863e-05, + "loss": 0.151, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.137956976890564, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1661, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 1.2401458024978638, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.162, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.01978600025177, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1421, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 0.9699510335922241, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1437, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 1.1067572832107544, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1607, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.236065149307251, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1625, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 0.8749372363090515, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1577, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 0.9473256468772888, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1509, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 1.068051815032959, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1395, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 1.1544103622436523, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.1455, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 0.8401674032211304, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1406, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 1.1042360067367554, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1369, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 1.0136735439300537, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1358, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.8991892337799072, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1379, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 0.9097805023193359, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1404, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.8734955191612244, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1326, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 0.8909756541252136, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1314, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 0.8816075325012207, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1287, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.7870368361473083, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1236, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.7812661528587341, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1261, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.6554967761039734, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.123, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.8096292614936829, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1205, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.6922826766967773, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1194, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.915884256362915, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1291, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.71278315782547, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1213, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.5872833132743835, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1195, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.6612381339073181, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1168, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.8989573121070862, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1208, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.7457563281059265, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1149, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.5976019501686096, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.1139, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.6130596399307251, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1163, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.4582606554031372, + "learning_rate": 9.922075858704368e-06, + "loss": 0.1021, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.5133168697357178, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1141, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.535451352596283, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1125, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.6372042298316956, + "learning_rate": 8.761866287946955e-06, + "loss": 0.109, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.6467808485031128, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1039, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.805828869342804, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1008, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.5690171122550964, + "learning_rate": 7.64518044300642e-06, + "loss": 0.0985, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.7584823369979858, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1047, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.4808284044265747, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1034, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.48702186346054077, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1021, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.8792144060134888, + "learning_rate": 6.237385210498588e-06, + "loss": 0.0924, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.6091505289077759, + "learning_rate": 5.901757461493989e-06, + "loss": 0.1054, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.3770187497138977, + "learning_rate": 5.573186564064649e-06, + "loss": 0.0959, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.46261662244796753, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1039, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.4640849232673645, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1068, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.425327330827713, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.1111, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.3971248269081116, + "learning_rate": 4.334482243338589e-06, + "loss": 0.0916, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.4024830460548401, + "learning_rate": 4.044902040769963e-06, + "loss": 0.0999, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.4534810185432434, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.0963, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.48523062467575073, + "learning_rate": 3.491450893410134e-06, + "loss": 0.0979, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.4905582666397095, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.0997, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.5949429273605347, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.0931, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.4610997140407562, + "learning_rate": 2.728715183008864e-06, + "loss": 0.0994, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.3820766806602478, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.0925, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.40432220697402954, + "learning_rate": 2.267485570730894e-06, + "loss": 0.0968, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.3597314953804016, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.0912, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.3629218637943268, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.0967, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.500973641872406, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.1065, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.42169755697250366, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.099, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.4272483289241791, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.0972, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.41805440187454224, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0916, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.50019371509552, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0999, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.6074540615081787, + "learning_rate": 8.299636258812199e-07, + "loss": 0.0961, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.4149613082408905, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0943, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.466071218252182, + "learning_rate": 5.780078276432865e-07, + "loss": 0.0983, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.483717679977417, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.0943, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.40139275789260864, + "learning_rate": 3.707877563706158e-07, + "loss": 0.0942, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.46539705991744995, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.0973, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.46307697892189026, + "learning_rate": 2.089461901495715e-07, + "loss": 0.1009, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.4313168227672577, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.101, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.5104858875274658, + "learning_rate": 9.298514701147898e-08, + "loss": 0.0924, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.4231293499469757, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0961, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.3676932752132416, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.0957, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.4017404615879059, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0912, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.737476473356288e+17, + "train_loss": 0.3038629004951278, + "train_runtime": 391.2563, + "train_samples_per_second": 48.444, + "train_steps_per_second": 1.518 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.737476473356288e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b876579d8d9b1743cf5d0d1dbccc90544af9da06 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/5_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7e23f2a0808228dcdeadbabe3b606f738caba94fa3656c48e83d07c117bebb +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..10a0171c95957c3bada9a63f8d2e30d2e1337853 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 6_128_e3_3e-5 + results: [] +--- + + + +# 6_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..891e61bfba0e36f1711986251034975efef832bf --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "gate_proj", + "down_proj", + "o_proj", + "v_proj", + "q_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33574cea90d2f3697490e9b05541dd15b1e1830e --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba4f663522e0ae8d1f308ed44ee8b2cf7d5869536e9266aac340a31af40d911 +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0bb801cc7e1f3ecd67b8e147514d179e60bfa309 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.760462524949463e+17, + "train_loss": 0.30572376009607233, + "train_runtime": 399.9587, + "train_samples": 6348, + "train_samples_per_second": 47.615, + "train_steps_per_second": 1.493 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0bb801cc7e1f3ecd67b8e147514d179e60bfa309 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.760462524949463e+17, + "train_loss": 0.30572376009607233, + "train_runtime": 399.9587, + "train_samples": 6348, + "train_samples_per_second": 47.615, + "train_steps_per_second": 1.493 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..df2b9553ea01c6b15cd82829b0f222e5754ef136 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/trainer_state.json @@ -0,0 +1,876 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 597, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02518891687657431, + "grad_norm": 1.658461332321167, + "learning_rate": 4e-06, + "loss": 1.8502, + "step": 5 + }, + { + "epoch": 0.05037783375314862, + "grad_norm": 1.6382395029067993, + "learning_rate": 9e-06, + "loss": 1.8475, + "step": 10 + }, + { + "epoch": 0.07556675062972293, + "grad_norm": 1.4018137454986572, + "learning_rate": 1.4e-05, + "loss": 1.7231, + "step": 15 + }, + { + "epoch": 0.10075566750629723, + "grad_norm": 1.3492830991744995, + "learning_rate": 1.9e-05, + "loss": 1.575, + "step": 20 + }, + { + "epoch": 0.12594458438287154, + "grad_norm": 1.1412041187286377, + "learning_rate": 2.4e-05, + "loss": 1.4893, + "step": 25 + }, + { + "epoch": 0.15113350125944586, + "grad_norm": 1.1831309795379639, + "learning_rate": 2.9e-05, + "loss": 1.3453, + "step": 30 + }, + { + "epoch": 0.17632241813602015, + "grad_norm": 1.222689151763916, + "learning_rate": 2.9996316191067322e-05, + "loss": 1.309, + "step": 35 + }, + { + "epoch": 0.20151133501259447, + "grad_norm": 1.1910148859024048, + "learning_rate": 2.9981353818283835e-05, + "loss": 1.1645, + "step": 40 + }, + { + "epoch": 0.22670025188916876, + "grad_norm": 1.1890814304351807, + "learning_rate": 2.995489411751688e-05, + "loss": 1.0744, + "step": 45 + }, + { + "epoch": 0.2518891687657431, + "grad_norm": 1.5773613452911377, + "learning_rate": 2.9916957395065996e-05, + "loss": 0.9859, + "step": 50 + }, + { + "epoch": 0.2770780856423174, + "grad_norm": 1.3885552883148193, + "learning_rate": 2.9867572765185192e-05, + "loss": 0.943, + "step": 55 + }, + { + "epoch": 0.3022670025188917, + "grad_norm": 1.6882840394973755, + "learning_rate": 2.9806778127739467e-05, + "loss": 0.885, + "step": 60 + }, + { + "epoch": 0.327455919395466, + "grad_norm": 1.9132777452468872, + "learning_rate": 2.9734620139118812e-05, + "loss": 0.7697, + "step": 65 + }, + { + "epoch": 0.3526448362720403, + "grad_norm": 1.751017689704895, + "learning_rate": 2.965115417643212e-05, + "loss": 0.7259, + "step": 70 + }, + { + "epoch": 0.3778337531486146, + "grad_norm": 1.8772876262664795, + "learning_rate": 2.9556444295008444e-05, + "loss": 0.6708, + "step": 75 + }, + { + "epoch": 0.40302267002518893, + "grad_norm": 1.5127477645874023, + "learning_rate": 2.9450563179238207e-05, + "loss": 0.6525, + "step": 80 + }, + { + "epoch": 0.4282115869017632, + "grad_norm": 1.8577784299850464, + "learning_rate": 2.9333592086792113e-05, + "loss": 0.6055, + "step": 85 + }, + { + "epoch": 0.4534005037783375, + "grad_norm": 1.7677193880081177, + "learning_rate": 2.920562078626055e-05, + "loss": 0.5526, + "step": 90 + }, + { + "epoch": 0.47858942065491183, + "grad_norm": 1.7497684955596924, + "learning_rate": 2.9066747488261378e-05, + "loss": 0.4877, + "step": 95 + }, + { + "epoch": 0.5037783375314862, + "grad_norm": 1.9154943227767944, + "learning_rate": 2.8917078770068882e-05, + "loss": 0.4572, + "step": 100 + }, + { + "epoch": 0.5289672544080605, + "grad_norm": 1.5672165155410767, + "learning_rate": 2.8756729493821883e-05, + "loss": 0.4311, + "step": 105 + }, + { + "epoch": 0.5541561712846348, + "grad_norm": 2.033596992492676, + "learning_rate": 2.8585822718373623e-05, + "loss": 0.4238, + "step": 110 + }, + { + "epoch": 0.5793450881612091, + "grad_norm": 1.5673311948776245, + "learning_rate": 2.8404489604851186e-05, + "loss": 0.4309, + "step": 115 + }, + { + "epoch": 0.6045340050377834, + "grad_norm": 1.5457563400268555, + "learning_rate": 2.821286931599684e-05, + "loss": 0.3808, + "step": 120 + }, + { + "epoch": 0.6297229219143576, + "grad_norm": 1.6764178276062012, + "learning_rate": 2.801110890936867e-05, + "loss": 0.3655, + "step": 125 + }, + { + "epoch": 0.654911838790932, + "grad_norm": 1.6413410902023315, + "learning_rate": 2.7799363224482334e-05, + "loss": 0.3308, + "step": 130 + }, + { + "epoch": 0.6801007556675063, + "grad_norm": 1.8217614889144897, + "learning_rate": 2.7577794763980634e-05, + "loss": 0.3258, + "step": 135 + }, + { + "epoch": 0.7052896725440806, + "grad_norm": 1.7405568361282349, + "learning_rate": 2.734657356892208e-05, + "loss": 0.3029, + "step": 140 + }, + { + "epoch": 0.7304785894206549, + "grad_norm": 1.4377340078353882, + "learning_rate": 2.710587708828414e-05, + "loss": 0.2848, + "step": 145 + }, + { + "epoch": 0.7556675062972292, + "grad_norm": 1.518257737159729, + "learning_rate": 2.685589004278139e-05, + "loss": 0.3112, + "step": 150 + }, + { + "epoch": 0.7808564231738035, + "grad_norm": 1.3453420400619507, + "learning_rate": 2.6596804283102928e-05, + "loss": 0.2457, + "step": 155 + }, + { + "epoch": 0.8060453400503779, + "grad_norm": 1.4524482488632202, + "learning_rate": 2.6328818642678026e-05, + "loss": 0.2273, + "step": 160 + }, + { + "epoch": 0.8312342569269522, + "grad_norm": 1.5285913944244385, + "learning_rate": 2.6052138785082897e-05, + "loss": 0.2293, + "step": 165 + }, + { + "epoch": 0.8564231738035264, + "grad_norm": 1.3759493827819824, + "learning_rate": 2.5766977046205735e-05, + "loss": 0.229, + "step": 170 + }, + { + "epoch": 0.8816120906801007, + "grad_norm": 1.2091211080551147, + "learning_rate": 2.5473552271291092e-05, + "loss": 0.2691, + "step": 175 + }, + { + "epoch": 0.906801007556675, + "grad_norm": 1.530138611793518, + "learning_rate": 2.5172089646988765e-05, + "loss": 0.2684, + "step": 180 + }, + { + "epoch": 0.9319899244332494, + "grad_norm": 1.5649440288543701, + "learning_rate": 2.4862820528535955e-05, + "loss": 0.2525, + "step": 185 + }, + { + "epoch": 0.9571788413098237, + "grad_norm": 1.4191148281097412, + "learning_rate": 2.4545982262205455e-05, + "loss": 0.2352, + "step": 190 + }, + { + "epoch": 0.982367758186398, + "grad_norm": 1.3743516206741333, + "learning_rate": 2.422181800315599e-05, + "loss": 0.2068, + "step": 195 + }, + { + "epoch": 1.0050377833753148, + "grad_norm": 1.2304866313934326, + "learning_rate": 2.3890576528824637e-05, + "loss": 0.209, + "step": 200 + }, + { + "epoch": 1.0302267002518892, + "grad_norm": 0.9995700120925903, + "learning_rate": 2.3552512048004428e-05, + "loss": 0.1703, + "step": 205 + }, + { + "epoch": 1.0554156171284634, + "grad_norm": 0.9197123646736145, + "learning_rate": 2.3207884005753707e-05, + "loss": 0.1516, + "step": 210 + }, + { + "epoch": 1.0806045340050379, + "grad_norm": 0.8599398136138916, + "learning_rate": 2.2856956884286986e-05, + "loss": 0.1626, + "step": 215 + }, + { + "epoch": 1.105793450881612, + "grad_norm": 1.0228945016860962, + "learning_rate": 2.25e-05, + "loss": 0.1521, + "step": 220 + }, + { + "epoch": 1.1309823677581865, + "grad_norm": 1.0545369386672974, + "learning_rate": 2.213728729678491e-05, + "loss": 0.1855, + "step": 225 + }, + { + "epoch": 1.1561712846347607, + "grad_norm": 0.9270104765892029, + "learning_rate": 2.1769097135794052e-05, + "loss": 0.1557, + "step": 230 + }, + { + "epoch": 1.181360201511335, + "grad_norm": 1.1454310417175293, + "learning_rate": 2.139571208181381e-05, + "loss": 0.1862, + "step": 235 + }, + { + "epoch": 1.2065491183879093, + "grad_norm": 0.9651398062705994, + "learning_rate": 2.101741868641233e-05, + "loss": 0.1547, + "step": 240 + }, + { + "epoch": 1.2317380352644836, + "grad_norm": 0.956687331199646, + "learning_rate": 2.0634507268027702e-05, + "loss": 0.161, + "step": 245 + }, + { + "epoch": 1.256926952141058, + "grad_norm": 1.0909441709518433, + "learning_rate": 2.0247271689165226e-05, + "loss": 0.1619, + "step": 250 + }, + { + "epoch": 1.2821158690176322, + "grad_norm": 1.0591400861740112, + "learning_rate": 1.985600913087482e-05, + "loss": 0.1632, + "step": 255 + }, + { + "epoch": 1.3073047858942066, + "grad_norm": 0.9082454442977905, + "learning_rate": 1.946101986468167e-05, + "loss": 0.1564, + "step": 260 + }, + { + "epoch": 1.3324937027707808, + "grad_norm": 0.8732438683509827, + "learning_rate": 1.906260702214508e-05, + "loss": 0.1541, + "step": 265 + }, + { + "epoch": 1.3576826196473553, + "grad_norm": 0.807729959487915, + "learning_rate": 1.866107636222242e-05, + "loss": 0.1375, + "step": 270 + }, + { + "epoch": 1.3828715365239295, + "grad_norm": 0.944558322429657, + "learning_rate": 1.82567360366167e-05, + "loss": 0.154, + "step": 275 + }, + { + "epoch": 1.4080604534005037, + "grad_norm": 0.9061353802680969, + "learning_rate": 1.7849896353287853e-05, + "loss": 0.137, + "step": 280 + }, + { + "epoch": 1.433249370277078, + "grad_norm": 1.0508615970611572, + "learning_rate": 1.744086953830922e-05, + "loss": 0.1492, + "step": 285 + }, + { + "epoch": 1.4584382871536523, + "grad_norm": 0.7615114450454712, + "learning_rate": 1.702996949625197e-05, + "loss": 0.1335, + "step": 290 + }, + { + "epoch": 1.4836272040302267, + "grad_norm": 1.1557296514511108, + "learning_rate": 1.6617511569281382e-05, + "loss": 0.1315, + "step": 295 + }, + { + "epoch": 1.508816120906801, + "grad_norm": 0.8622949123382568, + "learning_rate": 1.6203812295149876e-05, + "loss": 0.138, + "step": 300 + }, + { + "epoch": 1.5340050377833752, + "grad_norm": 0.9731374382972717, + "learning_rate": 1.5789189164272456e-05, + "loss": 0.1411, + "step": 305 + }, + { + "epoch": 1.5591939546599496, + "grad_norm": 0.7911257147789001, + "learning_rate": 1.5373960376071095e-05, + "loss": 0.1289, + "step": 310 + }, + { + "epoch": 1.584382871536524, + "grad_norm": 0.6322313547134399, + "learning_rate": 1.495844459477494e-05, + "loss": 0.1209, + "step": 315 + }, + { + "epoch": 1.6095717884130982, + "grad_norm": 0.7810544371604919, + "learning_rate": 1.4542960704863842e-05, + "loss": 0.1251, + "step": 320 + }, + { + "epoch": 1.6347607052896724, + "grad_norm": 1.0751497745513916, + "learning_rate": 1.4127827566342864e-05, + "loss": 0.125, + "step": 325 + }, + { + "epoch": 1.6599496221662469, + "grad_norm": 1.0193102359771729, + "learning_rate": 1.371336377003551e-05, + "loss": 0.128, + "step": 330 + }, + { + "epoch": 1.6851385390428213, + "grad_norm": 0.9494513869285583, + "learning_rate": 1.3299887393083629e-05, + "loss": 0.1234, + "step": 335 + }, + { + "epoch": 1.7103274559193955, + "grad_norm": 0.780321478843689, + "learning_rate": 1.288771575484145e-05, + "loss": 0.1251, + "step": 340 + }, + { + "epoch": 1.7355163727959697, + "grad_norm": 0.7068678140640259, + "learning_rate": 1.2477165173351256e-05, + "loss": 0.1175, + "step": 345 + }, + { + "epoch": 1.760705289672544, + "grad_norm": 0.7080034017562866, + "learning_rate": 1.206855072258742e-05, + "loss": 0.1229, + "step": 350 + }, + { + "epoch": 1.7858942065491183, + "grad_norm": 0.6469352841377258, + "learning_rate": 1.1662185990655285e-05, + "loss": 0.1207, + "step": 355 + }, + { + "epoch": 1.8110831234256928, + "grad_norm": 0.9020638465881348, + "learning_rate": 1.1258382839130282e-05, + "loss": 0.124, + "step": 360 + }, + { + "epoch": 1.836272040302267, + "grad_norm": 0.5834746360778809, + "learning_rate": 1.0857451163722119e-05, + "loss": 0.1099, + "step": 365 + }, + { + "epoch": 1.8614609571788412, + "grad_norm": 0.8812537789344788, + "learning_rate": 1.0459698656447612e-05, + "loss": 0.124, + "step": 370 + }, + { + "epoch": 1.8866498740554156, + "grad_norm": 0.5998390913009644, + "learning_rate": 1.0065430569494785e-05, + "loss": 0.1254, + "step": 375 + }, + { + "epoch": 1.91183879093199, + "grad_norm": 0.6841883063316345, + "learning_rate": 9.67494948095931e-06, + "loss": 0.1147, + "step": 380 + }, + { + "epoch": 1.9370277078085643, + "grad_norm": 0.7426761984825134, + "learning_rate": 9.288555062633258e-06, + "loss": 0.1101, + "step": 385 + }, + { + "epoch": 1.9622166246851385, + "grad_norm": 0.7562117576599121, + "learning_rate": 8.906543850024186e-06, + "loss": 0.1132, + "step": 390 + }, + { + "epoch": 1.987405541561713, + "grad_norm": 0.5789086222648621, + "learning_rate": 8.529209014781202e-06, + "loss": 0.1071, + "step": 395 + }, + { + "epoch": 2.0100755667506296, + "grad_norm": 0.5138999819755554, + "learning_rate": 8.156840139702554e-06, + "loss": 0.1057, + "step": 400 + }, + { + "epoch": 2.0352644836272042, + "grad_norm": 0.4506531357765198, + "learning_rate": 7.789722996497514e-06, + "loss": 0.1083, + "step": 405 + }, + { + "epoch": 2.0604534005037785, + "grad_norm": 0.4892246723175049, + "learning_rate": 7.4281393264729584e-06, + "loss": 0.1053, + "step": 410 + }, + { + "epoch": 2.0856423173803527, + "grad_norm": 0.5806070566177368, + "learning_rate": 7.072366624313169e-06, + "loss": 0.1089, + "step": 415 + }, + { + "epoch": 2.110831234256927, + "grad_norm": 0.521141529083252, + "learning_rate": 6.722677925118561e-06, + "loss": 0.0981, + "step": 420 + }, + { + "epoch": 2.136020151133501, + "grad_norm": 0.3723362982273102, + "learning_rate": 6.379341594866983e-06, + "loss": 0.1016, + "step": 425 + }, + { + "epoch": 2.1612090680100757, + "grad_norm": 0.5389318466186523, + "learning_rate": 6.0426211244582105e-06, + "loss": 0.1019, + "step": 430 + }, + { + "epoch": 2.18639798488665, + "grad_norm": 0.44918715953826904, + "learning_rate": 5.712774927499851e-06, + "loss": 0.1075, + "step": 435 + }, + { + "epoch": 2.211586901763224, + "grad_norm": 0.3776228129863739, + "learning_rate": 5.390056141989745e-06, + "loss": 0.1099, + "step": 440 + }, + { + "epoch": 2.2367758186397984, + "grad_norm": 0.5162299871444702, + "learning_rate": 5.0747124360471125e-06, + "loss": 0.1034, + "step": 445 + }, + { + "epoch": 2.261964735516373, + "grad_norm": 0.4922664761543274, + "learning_rate": 4.766985817841482e-06, + "loss": 0.1014, + "step": 450 + }, + { + "epoch": 2.287153652392947, + "grad_norm": 0.6048965454101562, + "learning_rate": 4.4671124498653624e-06, + "loss": 0.098, + "step": 455 + }, + { + "epoch": 2.3123425692695214, + "grad_norm": 0.46646395325660706, + "learning_rate": 4.175322467693068e-06, + "loss": 0.0983, + "step": 460 + }, + { + "epoch": 2.3375314861460956, + "grad_norm": 0.4608611762523651, + "learning_rate": 3.891839803364934e-06, + "loss": 0.1054, + "step": 465 + }, + { + "epoch": 2.36272040302267, + "grad_norm": 0.47166043519973755, + "learning_rate": 3.6168820135322987e-06, + "loss": 0.1061, + "step": 470 + }, + { + "epoch": 2.3879093198992445, + "grad_norm": 0.5191495418548584, + "learning_rate": 3.3506601124953246e-06, + "loss": 0.0961, + "step": 475 + }, + { + "epoch": 2.4130982367758187, + "grad_norm": 0.471501886844635, + "learning_rate": 3.0933784102616147e-06, + "loss": 0.0989, + "step": 480 + }, + { + "epoch": 2.438287153652393, + "grad_norm": 0.4370369017124176, + "learning_rate": 2.845234355750051e-06, + "loss": 0.1043, + "step": 485 + }, + { + "epoch": 2.463476070528967, + "grad_norm": 0.46079349517822266, + "learning_rate": 2.60641838526008e-06, + "loss": 0.094, + "step": 490 + }, + { + "epoch": 2.4886649874055413, + "grad_norm": 0.5423371195793152, + "learning_rate": 2.3771137763228014e-06, + "loss": 0.1013, + "step": 495 + }, + { + "epoch": 2.513853904282116, + "grad_norm": 0.38755887746810913, + "learning_rate": 2.1574965070460047e-06, + "loss": 0.093, + "step": 500 + }, + { + "epoch": 2.53904282115869, + "grad_norm": 0.4985490143299103, + "learning_rate": 1.947735121061088e-06, + "loss": 0.0896, + "step": 505 + }, + { + "epoch": 2.5642317380352644, + "grad_norm": 0.4004926085472107, + "learning_rate": 1.7479905981754917e-06, + "loss": 0.0961, + "step": 510 + }, + { + "epoch": 2.589420654911839, + "grad_norm": 0.39716675877571106, + "learning_rate": 1.5584162308299675e-06, + "loss": 0.1011, + "step": 515 + }, + { + "epoch": 2.6146095717884132, + "grad_norm": 0.309821218252182, + "learning_rate": 1.3791575064554262e-06, + "loss": 0.0969, + "step": 520 + }, + { + "epoch": 2.6397984886649875, + "grad_norm": 0.46527448296546936, + "learning_rate": 1.2103519958197084e-06, + "loss": 0.1013, + "step": 525 + }, + { + "epoch": 2.6649874055415617, + "grad_norm": 0.5214188694953918, + "learning_rate": 1.052129247449915e-06, + "loss": 0.0912, + "step": 530 + }, + { + "epoch": 2.690176322418136, + "grad_norm": 0.4405462443828583, + "learning_rate": 9.046106882113753e-07, + "loss": 0.107, + "step": 535 + }, + { + "epoch": 2.7153652392947105, + "grad_norm": 0.34990841150283813, + "learning_rate": 7.679095301194849e-07, + "loss": 0.0978, + "step": 540 + }, + { + "epoch": 2.7405541561712847, + "grad_norm": 0.38584640622138977, + "learning_rate": 6.421306834560126e-07, + "loss": 0.095, + "step": 545 + }, + { + "epoch": 2.765743073047859, + "grad_norm": 0.3733978569507599, + "learning_rate": 5.273706762564761e-07, + "loss": 0.0968, + "step": 550 + }, + { + "epoch": 2.790931989924433, + "grad_norm": 0.4254690706729889, + "learning_rate": 4.2371758023042604e-07, + "loss": 0.0991, + "step": 555 + }, + { + "epoch": 2.8161209068010074, + "grad_norm": 0.6424248218536377, + "learning_rate": 3.312509431714661e-07, + "loss": 0.0982, + "step": 560 + }, + { + "epoch": 2.841309823677582, + "grad_norm": 0.40730053186416626, + "learning_rate": 2.50041727908909e-07, + "loss": 0.0971, + "step": 565 + }, + { + "epoch": 2.866498740554156, + "grad_norm": 0.4304647445678711, + "learning_rate": 1.8015225784786483e-07, + "loss": 0.0927, + "step": 570 + }, + { + "epoch": 2.8916876574307304, + "grad_norm": 0.4579257369041443, + "learning_rate": 1.2163616913962395e-07, + "loss": 0.0966, + "step": 575 + }, + { + "epoch": 2.9168765743073046, + "grad_norm": 0.3719714283943176, + "learning_rate": 7.453836951897885e-08, + "loss": 0.1019, + "step": 580 + }, + { + "epoch": 2.942065491183879, + "grad_norm": 0.37047651410102844, + "learning_rate": 3.889500384013755e-08, + "loss": 0.0992, + "step": 585 + }, + { + "epoch": 2.9672544080604535, + "grad_norm": 0.3919045925140381, + "learning_rate": 1.4733426337610877e-08, + "loss": 0.0951, + "step": 590 + }, + { + "epoch": 2.9924433249370277, + "grad_norm": 0.37301090359687805, + "learning_rate": 2.0721796334149945e-09, + "loss": 0.0876, + "step": 595 + }, + { + "epoch": 3.0, + "step": 597, + "total_flos": 1.760462524949463e+17, + "train_loss": 0.30572376009607233, + "train_runtime": 399.9587, + "train_samples_per_second": 47.615, + "train_steps_per_second": 1.493 + } + ], + "logging_steps": 5, + "max_steps": 597, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.760462524949463e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fec35eae064015a84e7161091c7fc513d92e5e59 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/6_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5580465f14a6cc2c066ddd3e733273afbb183d328381c33e672e647201dd5ff +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..72b059673b68ff1eb7ac8e63071319c8e69ed5c8 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 7_128_e3_3e-5 + results: [] +--- + + + +# 7_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b18285eaf806f428befabff07667ed0a0c03dc59 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "gate_proj", + "k_proj", + "v_proj", + "down_proj", + "up_proj", + "o_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cf5ab1c269a2c0febd32512c25786c622ab1936 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ff22bfb8407d49949160c2459cc117bdc56fac67a58e4abdf9d762015c9c1f +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e39beebd07e919998c44893d6a597d97284251b --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.682776023545938e+17, + "train_loss": 0.3122998087193428, + "train_runtime": 391.7915, + "train_samples": 6319, + "train_samples_per_second": 48.385, + "train_steps_per_second": 1.516 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e39beebd07e919998c44893d6a597d97284251b --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 1.682776023545938e+17, + "train_loss": 0.3122998087193428, + "train_runtime": 391.7915, + "train_samples": 6319, + "train_samples_per_second": 48.385, + "train_steps_per_second": 1.516 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..396cbd98ffa5bae93b5038e3d1ab24ebd10281ac --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/trainer_state.json @@ -0,0 +1,869 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 594, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02531645569620253, + "grad_norm": 2.1695375442504883, + "learning_rate": 4e-06, + "loss": 1.8585, + "step": 5 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.6166181564331055, + "learning_rate": 9e-06, + "loss": 1.8336, + "step": 10 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.3863964080810547, + "learning_rate": 1.4e-05, + "loss": 1.6931, + "step": 15 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.567740797996521, + "learning_rate": 1.9e-05, + "loss": 1.631, + "step": 20 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.367470622062683, + "learning_rate": 2.4e-05, + "loss": 1.5411, + "step": 25 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.3025368452072144, + "learning_rate": 2.9e-05, + "loss": 1.3698, + "step": 30 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.1819227933883667, + "learning_rate": 2.9996276899008885e-05, + "loss": 1.3154, + "step": 35 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.3402793407440186, + "learning_rate": 2.9981154968741788e-05, + "loss": 1.184, + "step": 40 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.4258559942245483, + "learning_rate": 2.9954413235354147e-05, + "loss": 1.1219, + "step": 45 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.4379665851593018, + "learning_rate": 2.9916072440482896e-05, + "loss": 1.0473, + "step": 50 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.7157204151153564, + "learning_rate": 2.9866162322321703e-05, + "loss": 0.9536, + "step": 55 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.5242365598678589, + "learning_rate": 2.980472159255521e-05, + "loss": 0.8996, + "step": 60 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 1.6075643301010132, + "learning_rate": 2.973179790633317e-05, + "loss": 0.8451, + "step": 65 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.6495287418365479, + "learning_rate": 2.964744782530777e-05, + "loss": 0.784, + "step": 70 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.6084622144699097, + "learning_rate": 2.955173677376284e-05, + "loss": 0.6811, + "step": 75 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 1.9418607950210571, + "learning_rate": 2.9444738987868933e-05, + "loss": 0.6957, + "step": 80 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.871692180633545, + "learning_rate": 2.9326537458103687e-05, + "loss": 0.5759, + "step": 85 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.704154372215271, + "learning_rate": 2.9197223864882085e-05, + "loss": 0.5422, + "step": 90 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.7558987140655518, + "learning_rate": 2.9056898507446553e-05, + "loss": 0.5104, + "step": 95 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.5978717803955078, + "learning_rate": 2.890567022607206e-05, + "loss": 0.5001, + "step": 100 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 1.5627464056015015, + "learning_rate": 2.8743656317646575e-05, + "loss": 0.4473, + "step": 105 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.8181276321411133, + "learning_rate": 2.8570982444692272e-05, + "loss": 0.4435, + "step": 110 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.6764742136001587, + "learning_rate": 2.838778253789822e-05, + "loss": 0.3799, + "step": 115 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 1.9999672174453735, + "learning_rate": 2.8194198692239936e-05, + "loss": 0.3584, + "step": 120 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.6848214864730835, + "learning_rate": 2.7990381056766583e-05, + "loss": 0.3898, + "step": 125 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 1.7781054973602295, + "learning_rate": 2.777648771814114e-05, + "loss": 0.3299, + "step": 130 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.22605299949646, + "learning_rate": 2.7552684578024e-05, + "loss": 0.3486, + "step": 135 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.6218857765197754, + "learning_rate": 2.7319145224394925e-05, + "loss": 0.3053, + "step": 140 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.5747209787368774, + "learning_rate": 2.7076050796913445e-05, + "loss": 0.3002, + "step": 145 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 1.6384692192077637, + "learning_rate": 2.6823589846421784e-05, + "loss": 0.3344, + "step": 150 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 2.003324270248413, + "learning_rate": 2.6561958188699604e-05, + "loss": 0.2682, + "step": 155 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 1.789661169052124, + "learning_rate": 2.6291358752583768e-05, + "loss": 0.2772, + "step": 160 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 1.4945005178451538, + "learning_rate": 2.6012001422571097e-05, + "loss": 0.2654, + "step": 165 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 1.6609457731246948, + "learning_rate": 2.5724102876026033e-05, + "loss": 0.2511, + "step": 170 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 1.455230712890625, + "learning_rate": 2.5427886415119635e-05, + "loss": 0.2624, + "step": 175 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 1.4874557256698608, + "learning_rate": 2.5123581793630172e-05, + "loss": 0.2269, + "step": 180 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 1.3684391975402832, + "learning_rate": 2.4811425038739634e-05, + "loss": 0.1918, + "step": 185 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 1.5387717485427856, + "learning_rate": 2.449165826796448e-05, + "loss": 0.2111, + "step": 190 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 1.5798405408859253, + "learning_rate": 2.416452950136248e-05, + "loss": 0.2189, + "step": 195 + }, + { + "epoch": 1.010126582278481, + "grad_norm": 1.1537739038467407, + "learning_rate": 2.3830292469161442e-05, + "loss": 0.1882, + "step": 200 + }, + { + "epoch": 1.0354430379746835, + "grad_norm": 1.4097929000854492, + "learning_rate": 2.348920641495893e-05, + "loss": 0.175, + "step": 205 + }, + { + "epoch": 1.0607594936708862, + "grad_norm": 1.253466010093689, + "learning_rate": 2.3141535894645677e-05, + "loss": 0.1625, + "step": 210 + }, + { + "epoch": 1.0860759493670886, + "grad_norm": 1.373793363571167, + "learning_rate": 2.278755057120863e-05, + "loss": 0.1755, + "step": 215 + }, + { + "epoch": 1.111392405063291, + "grad_norm": 1.0851860046386719, + "learning_rate": 2.2427525005572803e-05, + "loss": 0.1811, + "step": 220 + }, + { + "epoch": 1.1367088607594937, + "grad_norm": 0.9273868799209595, + "learning_rate": 2.2061738443644124e-05, + "loss": 0.1613, + "step": 225 + }, + { + "epoch": 1.1620253164556962, + "grad_norm": 1.3098623752593994, + "learning_rate": 2.1690474599718487e-05, + "loss": 0.1727, + "step": 230 + }, + { + "epoch": 1.1873417721518988, + "grad_norm": 1.1888713836669922, + "learning_rate": 2.1314021436425026e-05, + "loss": 0.1711, + "step": 235 + }, + { + "epoch": 1.2126582278481013, + "grad_norm": 0.9476783871650696, + "learning_rate": 2.0932670941374218e-05, + "loss": 0.1589, + "step": 240 + }, + { + "epoch": 1.2379746835443037, + "grad_norm": 1.036568522453308, + "learning_rate": 2.054671890068414e-05, + "loss": 0.1598, + "step": 245 + }, + { + "epoch": 1.2632911392405064, + "grad_norm": 1.0381852388381958, + "learning_rate": 2.015646466956045e-05, + "loss": 0.1543, + "step": 250 + }, + { + "epoch": 1.2886075949367088, + "grad_norm": 0.8129894137382507, + "learning_rate": 1.976221094010814e-05, + "loss": 0.1686, + "step": 255 + }, + { + "epoch": 1.3139240506329113, + "grad_norm": 0.8720009922981262, + "learning_rate": 1.9364263506555043e-05, + "loss": 0.1407, + "step": 260 + }, + { + "epoch": 1.339240506329114, + "grad_norm": 0.8847567439079285, + "learning_rate": 1.8962931028069292e-05, + "loss": 0.145, + "step": 265 + }, + { + "epoch": 1.3645569620253164, + "grad_norm": 0.940678060054779, + "learning_rate": 1.8558524789354606e-05, + "loss": 0.1631, + "step": 270 + }, + { + "epoch": 1.389873417721519, + "grad_norm": 0.8375731110572815, + "learning_rate": 1.8151358459209167e-05, + "loss": 0.1585, + "step": 275 + }, + { + "epoch": 1.4151898734177215, + "grad_norm": 0.8451452851295471, + "learning_rate": 1.7741747847235356e-05, + "loss": 0.1464, + "step": 280 + }, + { + "epoch": 1.4405063291139242, + "grad_norm": 0.9230212569236755, + "learning_rate": 1.7330010658888928e-05, + "loss": 0.1514, + "step": 285 + }, + { + "epoch": 1.4658227848101266, + "grad_norm": 1.3358652591705322, + "learning_rate": 1.69164662490578e-05, + "loss": 0.1367, + "step": 290 + }, + { + "epoch": 1.491139240506329, + "grad_norm": 0.7412362694740295, + "learning_rate": 1.6501435374361478e-05, + "loss": 0.1312, + "step": 295 + }, + { + "epoch": 1.5164556962025317, + "grad_norm": 1.0141371488571167, + "learning_rate": 1.6085239944363192e-05, + "loss": 0.1338, + "step": 300 + }, + { + "epoch": 1.5417721518987342, + "grad_norm": 1.0047032833099365, + "learning_rate": 1.5668202771887886e-05, + "loss": 0.1276, + "step": 305 + }, + { + "epoch": 1.5670886075949366, + "grad_norm": 0.7829155325889587, + "learning_rate": 1.5250647322639515e-05, + "loss": 0.1288, + "step": 310 + }, + { + "epoch": 1.5924050632911393, + "grad_norm": 0.7046462893486023, + "learning_rate": 1.4832897464312018e-05, + "loss": 0.1239, + "step": 315 + }, + { + "epoch": 1.6177215189873417, + "grad_norm": 0.8060404658317566, + "learning_rate": 1.4415277215388445e-05, + "loss": 0.14, + "step": 320 + }, + { + "epoch": 1.6430379746835442, + "grad_norm": 0.5923952460289001, + "learning_rate": 1.3998110493823178e-05, + "loss": 0.1248, + "step": 325 + }, + { + "epoch": 1.6683544303797468, + "grad_norm": 0.7237229943275452, + "learning_rate": 1.3581720865802071e-05, + "loss": 0.1214, + "step": 330 + }, + { + "epoch": 1.6936708860759495, + "grad_norm": 0.8891835808753967, + "learning_rate": 1.3166431294775487e-05, + "loss": 0.1332, + "step": 335 + }, + { + "epoch": 1.7189873417721517, + "grad_norm": 0.8607147336006165, + "learning_rate": 1.2752563890958778e-05, + "loss": 0.1195, + "step": 340 + }, + { + "epoch": 1.7443037974683544, + "grad_norm": 0.8323397636413574, + "learning_rate": 1.234043966149462e-05, + "loss": 0.1265, + "step": 345 + }, + { + "epoch": 1.769620253164557, + "grad_norm": 0.5599714517593384, + "learning_rate": 1.1930378261470858e-05, + "loss": 0.1212, + "step": 350 + }, + { + "epoch": 1.7949367088607595, + "grad_norm": 0.5881018042564392, + "learning_rate": 1.1522697745987076e-05, + "loss": 0.1122, + "step": 355 + }, + { + "epoch": 1.820253164556962, + "grad_norm": 0.5837050080299377, + "learning_rate": 1.1117714323462188e-05, + "loss": 0.1223, + "step": 360 + }, + { + "epoch": 1.8455696202531646, + "grad_norm": 0.5853524804115295, + "learning_rate": 1.0715742110374305e-05, + "loss": 0.122, + "step": 365 + }, + { + "epoch": 1.870886075949367, + "grad_norm": 0.4891911745071411, + "learning_rate": 1.0317092887623206e-05, + "loss": 0.1205, + "step": 370 + }, + { + "epoch": 1.8962025316455695, + "grad_norm": 0.47171810269355774, + "learning_rate": 9.922075858704368e-06, + "loss": 0.11, + "step": 375 + }, + { + "epoch": 1.9215189873417722, + "grad_norm": 0.6183779835700989, + "learning_rate": 9.53099740988206e-06, + "loss": 0.1145, + "step": 380 + }, + { + "epoch": 1.9468354430379748, + "grad_norm": 0.5974256992340088, + "learning_rate": 9.144160872547579e-06, + "loss": 0.1281, + "step": 385 + }, + { + "epoch": 1.972151898734177, + "grad_norm": 0.5413382649421692, + "learning_rate": 8.761866287946955e-06, + "loss": 0.1098, + "step": 390 + }, + { + "epoch": 1.9974683544303797, + "grad_norm": 0.6107265949249268, + "learning_rate": 8.384410174460525e-06, + "loss": 0.1095, + "step": 395 + }, + { + "epoch": 2.020253164556962, + "grad_norm": 0.5207856893539429, + "learning_rate": 8.012085297615027e-06, + "loss": 0.1118, + "step": 400 + }, + { + "epoch": 2.0455696202531644, + "grad_norm": 0.4686215817928314, + "learning_rate": 7.64518044300642e-06, + "loss": 0.1014, + "step": 405 + }, + { + "epoch": 2.070886075949367, + "grad_norm": 0.3977816104888916, + "learning_rate": 7.2839801923096975e-06, + "loss": 0.1058, + "step": 410 + }, + { + "epoch": 2.0962025316455697, + "grad_norm": 0.6009082794189453, + "learning_rate": 6.928764702549411e-06, + "loss": 0.1077, + "step": 415 + }, + { + "epoch": 2.1215189873417724, + "grad_norm": 0.42069849371910095, + "learning_rate": 6.579809488801994e-06, + "loss": 0.1071, + "step": 420 + }, + { + "epoch": 2.1468354430379746, + "grad_norm": 0.4195733964443207, + "learning_rate": 6.237385210498588e-06, + "loss": 0.1031, + "step": 425 + }, + { + "epoch": 2.1721518987341772, + "grad_norm": 0.592564582824707, + "learning_rate": 5.901757461493989e-06, + "loss": 0.0981, + "step": 430 + }, + { + "epoch": 2.19746835443038, + "grad_norm": 0.4252696633338928, + "learning_rate": 5.573186564064649e-06, + "loss": 0.1082, + "step": 435 + }, + { + "epoch": 2.222784810126582, + "grad_norm": 0.4405692219734192, + "learning_rate": 5.25192736699541e-06, + "loss": 0.1064, + "step": 440 + }, + { + "epoch": 2.248101265822785, + "grad_norm": 0.4721933603286743, + "learning_rate": 4.938229047911652e-06, + "loss": 0.1043, + "step": 445 + }, + { + "epoch": 2.2734177215189875, + "grad_norm": 0.5156137347221375, + "learning_rate": 4.6323349200101535e-06, + "loss": 0.101, + "step": 450 + }, + { + "epoch": 2.2987341772151897, + "grad_norm": 0.43573084473609924, + "learning_rate": 4.334482243338589e-06, + "loss": 0.1037, + "step": 455 + }, + { + "epoch": 2.3240506329113924, + "grad_norm": 0.4283562898635864, + "learning_rate": 4.044902040769963e-06, + "loss": 0.1092, + "step": 460 + }, + { + "epoch": 2.349367088607595, + "grad_norm": 0.5194533467292786, + "learning_rate": 3.7638189188148204e-06, + "loss": 0.1128, + "step": 465 + }, + { + "epoch": 2.3746835443037977, + "grad_norm": 0.45542803406715393, + "learning_rate": 3.491450893410134e-06, + "loss": 0.097, + "step": 470 + }, + { + "epoch": 2.4, + "grad_norm": 0.32038259506225586, + "learning_rate": 3.2280092208200853e-06, + "loss": 0.0987, + "step": 475 + }, + { + "epoch": 2.4253164556962026, + "grad_norm": 0.44475293159484863, + "learning_rate": 2.9736982337797335e-06, + "loss": 0.1054, + "step": 480 + }, + { + "epoch": 2.4506329113924052, + "grad_norm": 0.49000054597854614, + "learning_rate": 2.728715183008864e-06, + "loss": 0.1014, + "step": 485 + }, + { + "epoch": 2.4759493670886075, + "grad_norm": 0.42747339606285095, + "learning_rate": 2.4932500842187955e-06, + "loss": 0.096, + "step": 490 + }, + { + "epoch": 2.50126582278481, + "grad_norm": 0.3527078330516815, + "learning_rate": 2.267485570730894e-06, + "loss": 0.1001, + "step": 495 + }, + { + "epoch": 2.526582278481013, + "grad_norm": 0.39417704939842224, + "learning_rate": 2.0515967518210254e-06, + "loss": 0.1063, + "step": 500 + }, + { + "epoch": 2.5518987341772155, + "grad_norm": 0.4018416106700897, + "learning_rate": 1.8457510768999276e-06, + "loss": 0.1009, + "step": 505 + }, + { + "epoch": 2.5772151898734177, + "grad_norm": 0.40117111802101135, + "learning_rate": 1.6501082056347488e-06, + "loss": 0.1073, + "step": 510 + }, + { + "epoch": 2.6025316455696204, + "grad_norm": 0.4131176173686981, + "learning_rate": 1.4648198841125453e-06, + "loss": 0.0973, + "step": 515 + }, + { + "epoch": 2.6278481012658226, + "grad_norm": 0.41961872577667236, + "learning_rate": 1.2900298271417592e-06, + "loss": 0.1038, + "step": 520 + }, + { + "epoch": 2.6531645569620252, + "grad_norm": 0.43403080105781555, + "learning_rate": 1.1258736067830016e-06, + "loss": 0.0956, + "step": 525 + }, + { + "epoch": 2.678481012658228, + "grad_norm": 0.41609829664230347, + "learning_rate": 9.724785471955566e-07, + "loss": 0.0968, + "step": 530 + }, + { + "epoch": 2.7037974683544306, + "grad_norm": 0.38761934638023376, + "learning_rate": 8.299636258812199e-07, + "loss": 0.1002, + "step": 535 + }, + { + "epoch": 2.729113924050633, + "grad_norm": 0.3125680088996887, + "learning_rate": 6.984393814019885e-07, + "loss": 0.0946, + "step": 540 + }, + { + "epoch": 2.7544303797468355, + "grad_norm": 0.3809020519256592, + "learning_rate": 5.780078276432865e-07, + "loss": 0.096, + "step": 545 + }, + { + "epoch": 2.779746835443038, + "grad_norm": 0.3504795730113983, + "learning_rate": 4.6876237468912007e-07, + "loss": 0.1033, + "step": 550 + }, + { + "epoch": 2.8050632911392404, + "grad_norm": 0.44968825578689575, + "learning_rate": 3.707877563706158e-07, + "loss": 0.1004, + "step": 555 + }, + { + "epoch": 2.830379746835443, + "grad_norm": 0.47015494108200073, + "learning_rate": 2.8415996454407287e-07, + "loss": 0.1058, + "step": 560 + }, + { + "epoch": 2.8556962025316457, + "grad_norm": 0.3551529049873352, + "learning_rate": 2.089461901495715e-07, + "loss": 0.0977, + "step": 565 + }, + { + "epoch": 2.8810126582278484, + "grad_norm": 0.3679092824459076, + "learning_rate": 1.4520477109578712e-07, + "loss": 0.0985, + "step": 570 + }, + { + "epoch": 2.9063291139240506, + "grad_norm": 0.4344881474971771, + "learning_rate": 9.298514701147898e-08, + "loss": 0.0982, + "step": 575 + }, + { + "epoch": 2.9316455696202532, + "grad_norm": 0.3154670000076294, + "learning_rate": 5.232782089872601e-08, + "loss": 0.0929, + "step": 580 + }, + { + "epoch": 2.9569620253164555, + "grad_norm": 0.3995712697505951, + "learning_rate": 2.3264327717674728e-08, + "loss": 0.1039, + "step": 585 + }, + { + "epoch": 2.982278481012658, + "grad_norm": 0.36005130410194397, + "learning_rate": 5.817209927129752e-09, + "loss": 0.0997, + "step": 590 + }, + { + "epoch": 3.0, + "step": 594, + "total_flos": 1.682776023545938e+17, + "train_loss": 0.3122998087193428, + "train_runtime": 391.7915, + "train_samples_per_second": 48.385, + "train_steps_per_second": 1.516 + } + ], + "logging_steps": 5, + "max_steps": 594, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.682776023545938e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ab8a387d4af06765f3fe90bd3afd8b5c60ffeb8 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/7_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817df4ca213ecddc520df9d7f1f94abd875c3329b299d6de988dea442c274800 +size 8337 diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/README.md b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e6eefeadbb054945606e26ec07757228af0eae83 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/README.md @@ -0,0 +1,63 @@ +--- +library_name: peft +license: llama3.1 +base_model: meta-llama/Llama-3.1-8B-Instruct +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- data/barexam_qa/train/processed/knowledge_117 +model-index: +- name: 8_128_e3_3e-5 + results: [] +--- + + + +# 8_128_e3_3e-5 + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the data/barexam_qa/train/processed/knowledge_117 dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 3e-05 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 32 +- total_eval_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.05 +- num_epochs: 3.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.2 +- Transformers 4.52.4 +- Pytorch 2.7.0+cu126 +- Datasets 3.6.0 +- Tokenizers 0.21.2 \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/adapter_config.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6ad7b98bfa62ae610824f00de0bbef6c4a91fb9 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 256, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 128, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj", + "down_proj", + "up_proj", + "o_proj", + "k_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/adapter_model.safetensors b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b143ca24c3607b54d39b342cf686ff771d80076 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff3b53c303daf3b9813a615e90cc932df8471ab5031afbebe155ca2423e397b +size 671150064 diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/all_results.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5431b43b88a45460d338c7eadf59d52d1e4f1688 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 2.983220060618752e+16, + "train_loss": 0.44147669564011277, + "train_runtime": 65.7749, + "train_samples": 972, + "train_samples_per_second": 44.333, + "train_steps_per_second": 1.414 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/chat_template.jinja b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/config.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ce53acf389baaeb67165e24e9c851a84aaaec95 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/special_tokens_map.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ca9da6d1141adb2d968d869bf31b68250eac3c0f --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|reserved_special_token_247|>" +} diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6ac83ff885a7c13c549808db25dd230af9d67648 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a7e7c676f7be2e5d2e8449c508be9b00e5b18c5b3c39ebc626e96b3f4b988 +size 17210019 diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer_config.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccb376b77a2b1a8c28d82c214f57c8e8ef9dccd5 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|reserved_special_token_247|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/train_results.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5431b43b88a45460d338c7eadf59d52d1e4f1688 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 2.983220060618752e+16, + "train_loss": 0.44147669564011277, + "train_runtime": 65.7749, + "train_samples": 972, + "train_samples_per_second": 44.333, + "train_steps_per_second": 1.414 +} \ No newline at end of file diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/trainer_state.json b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dcbd8562f0ec9057ded02ae01428c742f891e014 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/trainer_state.json @@ -0,0 +1,169 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 93, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.16393442622950818, + "grad_norm": 1.2280281782150269, + "learning_rate": 2.4e-05, + "loss": 1.778, + "step": 5 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 1.6359772682189941, + "learning_rate": 2.984732162821399e-05, + "loss": 1.473, + "step": 10 + }, + { + "epoch": 0.4918032786885246, + "grad_norm": 1.3154487609863281, + "learning_rate": 2.9232388752559797e-05, + "loss": 1.0596, + "step": 15 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 1.4620033502578735, + "learning_rate": 2.8165184843508835e-05, + "loss": 0.7568, + "step": 20 + }, + { + "epoch": 0.819672131147541, + "grad_norm": 1.2525016069412231, + "learning_rate": 2.6679623070746327e-05, + "loss": 0.6206, + "step": 25 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 1.3828377723693848, + "learning_rate": 2.482291100917928e-05, + "loss": 0.4209, + "step": 30 + }, + { + "epoch": 1.1311475409836065, + "grad_norm": 1.3472776412963867, + "learning_rate": 2.2654050495913498e-05, + "loss": 0.38, + "step": 35 + }, + { + "epoch": 1.2950819672131146, + "grad_norm": 1.1111876964569092, + "learning_rate": 2.0241962693986477e-05, + "loss": 0.2682, + "step": 40 + }, + { + "epoch": 1.459016393442623, + "grad_norm": 1.3165218830108643, + "learning_rate": 1.7663297943814555e-05, + "loss": 0.2254, + "step": 45 + }, + { + "epoch": 1.6229508196721312, + "grad_norm": 0.829387903213501, + "learning_rate": 1.5e-05, + "loss": 0.2028, + "step": 50 + }, + { + "epoch": 1.7868852459016393, + "grad_norm": 1.0182842016220093, + "learning_rate": 1.2336702056185454e-05, + "loss": 0.162, + "step": 55 + }, + { + "epoch": 1.9508196721311475, + "grad_norm": 0.8388665318489075, + "learning_rate": 9.758037306013527e-06, + "loss": 0.1466, + "step": 60 + }, + { + "epoch": 2.098360655737705, + "grad_norm": 0.7705045342445374, + "learning_rate": 7.345949504086509e-06, + "loss": 0.1265, + "step": 65 + }, + { + "epoch": 2.262295081967213, + "grad_norm": 0.6365391612052917, + "learning_rate": 5.177088990820725e-06, + "loss": 0.1172, + "step": 70 + }, + { + "epoch": 2.4262295081967213, + "grad_norm": 0.8721818327903748, + "learning_rate": 3.3203769292536767e-06, + "loss": 0.1107, + "step": 75 + }, + { + "epoch": 2.5901639344262293, + "grad_norm": 0.5932118892669678, + "learning_rate": 1.8348151564911653e-06, + "loss": 0.0994, + "step": 80 + }, + { + "epoch": 2.7540983606557377, + "grad_norm": 0.7568470239639282, + "learning_rate": 7.676112474402069e-07, + "loss": 0.1018, + "step": 85 + }, + { + "epoch": 2.918032786885246, + "grad_norm": 0.677406370639801, + "learning_rate": 1.5267837178600974e-07, + "loss": 0.1058, + "step": 90 + }, + { + "epoch": 3.0, + "step": 93, + "total_flos": 2.983220060618752e+16, + "train_loss": 0.44147669564011277, + "train_runtime": 65.7749, + "train_samples_per_second": 44.333, + "train_steps_per_second": 1.414 + } + ], + "logging_steps": 5, + "max_steps": 93, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 20000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.983220060618752e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/training_args.bin b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a57e30a463ebca97c5a00f391105438215fdb97 --- /dev/null +++ b/barexam_qa_train_knowledge_100_instruct/8_128_e3_3e-5/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608305c137ff40baf340a4b35b65585417bd64da730e312be733430b20a1adcc +size 8337