diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..5216b887614ffc0efffe9860d89660c82e2cab5d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,24 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-10500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-11000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-11500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-12000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-12500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-13000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-13500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-13911/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-4500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-5500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-6000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-6500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-7000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-7500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-8000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-8500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-9000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/checkpoint-9500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +sft/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/sft/README.md b/sft/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/adapter_config.json b/sft/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..897aff50c4ade73201937d923c166a794574f678 --- /dev/null +++ b/sft/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "gate_proj", + "q_proj", + "k_proj", + "down_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/adapter_model.safetensors b/sft/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..035f8dc9e20a825ab1d5dced50552e5a51cb8597 --- /dev/null +++ b/sft/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db9062ddd9124c71dc4444b0caef7c0e0891e8e5169d33b082970a973b6a5c9 +size 335604696 diff --git a/sft/checkpoint-10000/README.md b/sft/checkpoint-10000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-10000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-10000/adapter_config.json b/sft/checkpoint-10000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-10000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-10000/adapter_model.safetensors b/sft/checkpoint-10000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8aa7ceadc944383175b6b6422d6237da63b0b9c --- /dev/null +++ b/sft/checkpoint-10000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39bd85dfbc564459489ddda9bb640ec87ede6d922866c4eae1d442babcec7835 +size 335604696 diff --git a/sft/checkpoint-10000/optimizer.pt b/sft/checkpoint-10000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5939f64f125cc84fc5242cb9d06ceaa2c06c714f --- /dev/null +++ b/sft/checkpoint-10000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6667d82a73b83803272bfd30fc5aa1d212078b6d78d98bc34f7570188eea14a7 +size 671466706 diff --git a/sft/checkpoint-10000/rng_state_0.pth b/sft/checkpoint-10000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..043fa49e049fdb15cf63327aff00e45fb6e706be --- /dev/null +++ b/sft/checkpoint-10000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac2edec4e6e74254300214fc9e456d439974b8b2ef244f971c5925e27f63e04 +size 14512 diff --git a/sft/checkpoint-10000/rng_state_1.pth b/sft/checkpoint-10000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ecb0c8a1da6ed1eb4fc95099d16865af59e6b858 --- /dev/null +++ b/sft/checkpoint-10000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e1749a93c6ac1205c21c68b704372e141a714b309b40f2a8f8716891417c5f +size 14512 diff --git a/sft/checkpoint-10000/scaler.pt b/sft/checkpoint-10000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a69479a584c39d259e74922f2190abb28900dcd9 --- /dev/null +++ b/sft/checkpoint-10000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6404977d9b2bc59f927dcbe2e543d4d65c461d3382f1de939b633c052c6d17b1 +size 988 diff --git a/sft/checkpoint-10000/scheduler.pt b/sft/checkpoint-10000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..402b154da70155e6b8fb6bb1df45f1f186a60d53 --- /dev/null +++ b/sft/checkpoint-10000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade8a5f6a69ba3b6e94d4c7d3556d61ad117887af807141f1437d67be8a80c4c +size 1064 diff --git a/sft/checkpoint-10000/special_tokens_map.json b/sft/checkpoint-10000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-10000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-10000/tokenizer.json b/sft/checkpoint-10000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-10000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-10000/tokenizer_config.json b/sft/checkpoint-10000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-10000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-10000/trainer_state.json b/sft/checkpoint-10000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fd23c013b2b05764c164646279b96c5b8263bcf1 --- /dev/null +++ b/sft/checkpoint-10000/trainer_state.json @@ -0,0 +1,1434 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.1565836298932384, + "eval_steps": 500, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.475615094502536e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-10000/training_args.bin b/sft/checkpoint-10000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-10000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-10500/README.md b/sft/checkpoint-10500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-10500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-10500/adapter_config.json b/sft/checkpoint-10500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-10500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-10500/adapter_model.safetensors b/sft/checkpoint-10500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ab2a84c90c45dc4095f2e5ac86c608f1a4d4c48 --- /dev/null +++ b/sft/checkpoint-10500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a784959f15d4932339aaac14c0d201e6617c5a82d9103b4ec656abb240f8f59e +size 335604696 diff --git a/sft/checkpoint-10500/optimizer.pt b/sft/checkpoint-10500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5feb63352d8ade6490888c7d413f7dc5acae79fd --- /dev/null +++ b/sft/checkpoint-10500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2499f396c74c0bd30b3281772e89910892cef78d4e4d3c3f2dfb346271f8b524 +size 671466706 diff --git a/sft/checkpoint-10500/rng_state_0.pth b/sft/checkpoint-10500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..67e4b80cabb2b454cf6e5ee81497e9762d7c1e98 --- /dev/null +++ b/sft/checkpoint-10500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14b8930db43a3cc76c2340e8c425aa37f5c38b933212a6e74ef372e818d409f +size 14512 diff --git a/sft/checkpoint-10500/rng_state_1.pth b/sft/checkpoint-10500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4b8c7df0200f18fc61d17a3069d55015ab245a0 --- /dev/null +++ b/sft/checkpoint-10500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2cad9277a3a565ffe85fe3f42ee50b1d385118dedd5081204a34ee3e2678db7 +size 14512 diff --git a/sft/checkpoint-10500/scaler.pt b/sft/checkpoint-10500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d446edb4b70049752b439a55cdd3e6bf5a246dd --- /dev/null +++ b/sft/checkpoint-10500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3e735694b44e300e68ca0a0ff23f1da02e4eb073ad54e5584fc33739f8fca62 +size 988 diff --git a/sft/checkpoint-10500/scheduler.pt b/sft/checkpoint-10500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bffa127a18b9ebd2660209af48fcab88c9d833d --- /dev/null +++ b/sft/checkpoint-10500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:077effc2f172abbf14dd5e54d2b1a9466354559467ecc374ed4d6754e3010e0b +size 1064 diff --git a/sft/checkpoint-10500/special_tokens_map.json b/sft/checkpoint-10500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-10500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-10500/tokenizer.json b/sft/checkpoint-10500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-10500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-10500/tokenizer_config.json b/sft/checkpoint-10500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-10500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-10500/trainer_state.json b/sft/checkpoint-10500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d68d46e6438a59821284c8d67992074815bd1140 --- /dev/null +++ b/sft/checkpoint-10500/trainer_state.json @@ -0,0 +1,1504 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2644235953844496, + "eval_steps": 500, + "global_step": 10500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.6994182296150475e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-10500/training_args.bin b/sft/checkpoint-10500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-10500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-11000/README.md b/sft/checkpoint-11000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-11000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-11000/adapter_config.json b/sft/checkpoint-11000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-11000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-11000/adapter_model.safetensors b/sft/checkpoint-11000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3558e176380189e9fb2c38d3af1df645919341c5 --- /dev/null +++ b/sft/checkpoint-11000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09575f19524c42ad2dead79488f5299645a8a2631f8d09fd87264e9f028f9034 +size 335604696 diff --git a/sft/checkpoint-11000/optimizer.pt b/sft/checkpoint-11000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbf84c28322b5e15d56ba3cfa84e244ac22233af --- /dev/null +++ b/sft/checkpoint-11000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992420e0e6c7d1e0a1d5ffbad95c4a3d55fb0e1cf555aacc263dcec2da7340d8 +size 671466706 diff --git a/sft/checkpoint-11000/rng_state_0.pth b/sft/checkpoint-11000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..61c2d03ef455d82b0449689d4dcef867d78e0c34 --- /dev/null +++ b/sft/checkpoint-11000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce55431de266524511d4dee81be46abbecaa4b38e41c5b867bf55239bfac5940 +size 14512 diff --git a/sft/checkpoint-11000/rng_state_1.pth b/sft/checkpoint-11000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c13419dd615d41adac34a50e85a08b8f7f47bd16 --- /dev/null +++ b/sft/checkpoint-11000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c167eea32e2660ce4ac1ef502ce3bddd025ae0aed99348c68b2f57bad0601e +size 14512 diff --git a/sft/checkpoint-11000/scaler.pt b/sft/checkpoint-11000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f20c464822e7c549ba9a130c8364fc98d47e3094 --- /dev/null +++ b/sft/checkpoint-11000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb1589a91eaa8e9d3c88c388246ec59f58c35c8ee4a54e99178b8418dcb662f +size 988 diff --git a/sft/checkpoint-11000/scheduler.pt b/sft/checkpoint-11000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b3c195bf1f977463da4a3b9306e4a8592fed647 --- /dev/null +++ b/sft/checkpoint-11000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cdb6318f3353605a08e6c965a7851c15b2866eee6da8c8c8fed4e707973c523 +size 1064 diff --git a/sft/checkpoint-11000/special_tokens_map.json b/sft/checkpoint-11000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-11000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-11000/tokenizer.json b/sft/checkpoint-11000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-11000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-11000/tokenizer_config.json b/sft/checkpoint-11000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-11000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-11000/trainer_state.json b/sft/checkpoint-11000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9fa2027b8e1f1a60be4d3377ba6c192d4ab3a9b2 --- /dev/null +++ b/sft/checkpoint-11000/trainer_state.json @@ -0,0 +1,1574 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.3722635608756604, + "eval_steps": 500, + "global_step": 11000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.923221364727559e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-11000/training_args.bin b/sft/checkpoint-11000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-11000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-11500/README.md b/sft/checkpoint-11500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-11500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-11500/adapter_config.json b/sft/checkpoint-11500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-11500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-11500/adapter_model.safetensors b/sft/checkpoint-11500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b243875f8f1f530a59b076ebbefc027696870515 --- /dev/null +++ b/sft/checkpoint-11500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa2ea18f39d164abb59fb69ad2e3374cd245195d4994b0cfb9775bede63a1cc0 +size 335604696 diff --git a/sft/checkpoint-11500/optimizer.pt b/sft/checkpoint-11500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2abd6596a604627f6841615d0459ce4e2cbd377e --- /dev/null +++ b/sft/checkpoint-11500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:171920138553d50f5fe516f0f4a0b2a251d425bf86d2d232e3d419c899a6b83c +size 671466706 diff --git a/sft/checkpoint-11500/rng_state_0.pth b/sft/checkpoint-11500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a865bb536dfdd8d37aba94130e51394e223267c5 --- /dev/null +++ b/sft/checkpoint-11500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50b48b9045377efa301c425190a6b4bb9bc454191805c0acf6e5e3e68ddeabd +size 14512 diff --git a/sft/checkpoint-11500/rng_state_1.pth b/sft/checkpoint-11500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3881faac412331f9161bb175ba5e92e308291325 --- /dev/null +++ b/sft/checkpoint-11500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7542157c44ce62f64c5081cc4c12bb2bc6aacc55b2f1f2b8e68cd25885f450b8 +size 14512 diff --git a/sft/checkpoint-11500/scaler.pt b/sft/checkpoint-11500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3545a15d53b4c66ccfbef6cc58e1f81f41444ef --- /dev/null +++ b/sft/checkpoint-11500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f359299d0abba3823e0577fab91e21df981d228ab52e2a184aa879caf820d06 +size 988 diff --git a/sft/checkpoint-11500/scheduler.pt b/sft/checkpoint-11500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd7d51de06e71836071fd492d5756456f1abb582 --- /dev/null +++ b/sft/checkpoint-11500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98e9a929d2c5129e7429d29459450cf33b2793285532758544a4da73fff79be +size 1064 diff --git a/sft/checkpoint-11500/special_tokens_map.json b/sft/checkpoint-11500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-11500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-11500/tokenizer.json b/sft/checkpoint-11500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-11500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-11500/tokenizer_config.json b/sft/checkpoint-11500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-11500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-11500/trainer_state.json b/sft/checkpoint-11500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..212034a74c242a8531c0cb6d2281bd1772fff7d2 --- /dev/null +++ b/sft/checkpoint-11500/trainer_state.json @@ -0,0 +1,1644 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.4801035263668716, + "eval_steps": 500, + "global_step": 11500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + }, + { + "epoch": 2.383047557424782, + "grad_norm": 0.31749865412712097, + "learning_rate": 5.355325611978049e-06, + "loss": 0.8558, + "step": 11050 + }, + { + "epoch": 2.393831553973903, + "grad_norm": 0.3048788905143738, + "learning_rate": 5.176653955689878e-06, + "loss": 0.8696, + "step": 11100 + }, + { + "epoch": 2.404615550523024, + "grad_norm": 0.33253952860832214, + "learning_rate": 5.0006688484305095e-06, + "loss": 0.864, + "step": 11150 + }, + { + "epoch": 2.4153995470721448, + "grad_norm": 0.29218512773513794, + "learning_rate": 4.827394140493341e-06, + "loss": 0.8723, + "step": 11200 + }, + { + "epoch": 2.426183543621266, + "grad_norm": 0.31756484508514404, + "learning_rate": 4.656853314846244e-06, + "loss": 0.8501, + "step": 11250 + }, + { + "epoch": 2.436967540170387, + "grad_norm": 0.33288106322288513, + "learning_rate": 4.4890694839490685e-06, + "loss": 0.8823, + "step": 11300 + }, + { + "epoch": 2.447751536719508, + "grad_norm": 0.2978493273258209, + "learning_rate": 4.3240653866213235e-06, + "loss": 0.871, + "step": 11350 + }, + { + "epoch": 2.458535533268629, + "grad_norm": 0.26963189244270325, + "learning_rate": 4.161863384960549e-06, + "loss": 0.8671, + "step": 11400 + }, + { + "epoch": 2.4693195298177506, + "grad_norm": 0.25764644145965576, + "learning_rate": 4.002485461311631e-06, + "loss": 0.8537, + "step": 11450 + }, + { + "epoch": 2.4801035263668716, + "grad_norm": 0.3499026298522949, + "learning_rate": 3.8459532152877425e-06, + "loss": 0.8574, + "step": 11500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.1470244998400705e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-11500/training_args.bin b/sft/checkpoint-11500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-11500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-12000/README.md b/sft/checkpoint-12000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-12000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-12000/adapter_config.json b/sft/checkpoint-12000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-12000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-12000/adapter_model.safetensors b/sft/checkpoint-12000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4935d30b6ff2c13f432cf9a241f7259314a3c04a --- /dev/null +++ b/sft/checkpoint-12000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5444ea282087159748591b8571306fc44fecb76ba1095e8c3f9023c63452258 +size 335604696 diff --git a/sft/checkpoint-12000/optimizer.pt b/sft/checkpoint-12000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c8c8037edde42ca655c3d82077438fbe76e75c7 --- /dev/null +++ b/sft/checkpoint-12000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fddd2f1639df90abe8e8560c14e2acfb17d3ad86484eedf56ad97755d24301d3 +size 671466706 diff --git a/sft/checkpoint-12000/rng_state_0.pth b/sft/checkpoint-12000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..291f1555b3bdc4ff3d4a069828462ccc69f93365 --- /dev/null +++ b/sft/checkpoint-12000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7e770fd5d80351a633d7952ab7ae8e284fbe96ba95e43face49f138b9cb9ca2 +size 14512 diff --git a/sft/checkpoint-12000/rng_state_1.pth b/sft/checkpoint-12000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c262ce5a700ed69823fd34524c6e62d83daebcc --- /dev/null +++ b/sft/checkpoint-12000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7b779e2d2d14371bbdef6c5f5ef87e8aee2df9067a442ace1184d5278f4878a +size 14512 diff --git a/sft/checkpoint-12000/scaler.pt b/sft/checkpoint-12000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ad0cb31f7bde7a59ab42b4809b39aa12988d781 --- /dev/null +++ b/sft/checkpoint-12000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021c2f937246a8c6305c3a711e8f5ed4e1fbc344c7f44944244f727eb8297b45 +size 988 diff --git a/sft/checkpoint-12000/scheduler.pt b/sft/checkpoint-12000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b80df4f4f38fb7ec3fb91e7c75dfbcb2267939a --- /dev/null +++ b/sft/checkpoint-12000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38fe078ebe24cb4cb3b26ee692176ed7e22bf32020cf06e9181488fced34a802 +size 1064 diff --git a/sft/checkpoint-12000/special_tokens_map.json b/sft/checkpoint-12000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-12000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-12000/tokenizer.json b/sft/checkpoint-12000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-12000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-12000/tokenizer_config.json b/sft/checkpoint-12000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-12000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-12000/trainer_state.json b/sft/checkpoint-12000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b8839264ad214bf86d1d4138da0901c80cc1df56 --- /dev/null +++ b/sft/checkpoint-12000/trainer_state.json @@ -0,0 +1,1714 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5879434918580824, + "eval_steps": 500, + "global_step": 12000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + }, + { + "epoch": 2.383047557424782, + "grad_norm": 0.31749865412712097, + "learning_rate": 5.355325611978049e-06, + "loss": 0.8558, + "step": 11050 + }, + { + "epoch": 2.393831553973903, + "grad_norm": 0.3048788905143738, + "learning_rate": 5.176653955689878e-06, + "loss": 0.8696, + "step": 11100 + }, + { + "epoch": 2.404615550523024, + "grad_norm": 0.33253952860832214, + "learning_rate": 5.0006688484305095e-06, + "loss": 0.864, + "step": 11150 + }, + { + "epoch": 2.4153995470721448, + "grad_norm": 0.29218512773513794, + "learning_rate": 4.827394140493341e-06, + "loss": 0.8723, + "step": 11200 + }, + { + "epoch": 2.426183543621266, + "grad_norm": 0.31756484508514404, + "learning_rate": 4.656853314846244e-06, + "loss": 0.8501, + "step": 11250 + }, + { + "epoch": 2.436967540170387, + "grad_norm": 0.33288106322288513, + "learning_rate": 4.4890694839490685e-06, + "loss": 0.8823, + "step": 11300 + }, + { + "epoch": 2.447751536719508, + "grad_norm": 0.2978493273258209, + "learning_rate": 4.3240653866213235e-06, + "loss": 0.871, + "step": 11350 + }, + { + "epoch": 2.458535533268629, + "grad_norm": 0.26963189244270325, + "learning_rate": 4.161863384960549e-06, + "loss": 0.8671, + "step": 11400 + }, + { + "epoch": 2.4693195298177506, + "grad_norm": 0.25764644145965576, + "learning_rate": 4.002485461311631e-06, + "loss": 0.8537, + "step": 11450 + }, + { + "epoch": 2.4801035263668716, + "grad_norm": 0.3499026298522949, + "learning_rate": 3.8459532152877425e-06, + "loss": 0.8574, + "step": 11500 + }, + { + "epoch": 2.4908875229159926, + "grad_norm": 0.30950355529785156, + "learning_rate": 3.6922878608430076e-06, + "loss": 0.8412, + "step": 11550 + }, + { + "epoch": 2.5016715194651136, + "grad_norm": 0.28624480962753296, + "learning_rate": 3.5415102233974844e-06, + "loss": 0.8573, + "step": 11600 + }, + { + "epoch": 2.512455516014235, + "grad_norm": 0.28546643257141113, + "learning_rate": 3.393640737014875e-06, + "loss": 0.861, + "step": 11650 + }, + { + "epoch": 2.523239512563356, + "grad_norm": 0.29488733410835266, + "learning_rate": 3.2486994416331405e-06, + "loss": 0.8696, + "step": 11700 + }, + { + "epoch": 2.534023509112477, + "grad_norm": 0.29552578926086426, + "learning_rate": 3.1067059803486285e-06, + "loss": 0.847, + "step": 11750 + }, + { + "epoch": 2.5448075056615984, + "grad_norm": 0.2657387852668762, + "learning_rate": 2.967679596753953e-06, + "loss": 0.8769, + "step": 11800 + }, + { + "epoch": 2.5555915022107194, + "grad_norm": 0.29935380816459656, + "learning_rate": 2.831639132330019e-06, + "loss": 0.8501, + "step": 11850 + }, + { + "epoch": 2.5663754987598404, + "grad_norm": 0.31909531354904175, + "learning_rate": 2.698603023892515e-06, + "loss": 0.8857, + "step": 11900 + }, + { + "epoch": 2.5771594953089614, + "grad_norm": 0.2908760905265808, + "learning_rate": 2.5685893010933133e-06, + "loss": 0.8364, + "step": 11950 + }, + { + "epoch": 2.5879434918580824, + "grad_norm": 0.3088342249393463, + "learning_rate": 2.4416155839769724e-06, + "loss": 0.8797, + "step": 12000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.370827634952582e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-12000/training_args.bin b/sft/checkpoint-12000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-12000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-12500/README.md b/sft/checkpoint-12500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-12500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-12500/adapter_config.json b/sft/checkpoint-12500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-12500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-12500/adapter_model.safetensors b/sft/checkpoint-12500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e9d35b59a36d37837f00b9ca684d90fbad6fdd8 --- /dev/null +++ b/sft/checkpoint-12500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d09f0b3c0d6e60046b7f0479df4ba1aeb125c12460338c028c86e2cbf6a2cc +size 335604696 diff --git a/sft/checkpoint-12500/optimizer.pt b/sft/checkpoint-12500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..868b2a3fb237f3a5215c098e3b9b587084aabe54 --- /dev/null +++ b/sft/checkpoint-12500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e509fa41384386983670435dadc712a76c07797575df4c8400749cf1889d01fe +size 671466706 diff --git a/sft/checkpoint-12500/rng_state_0.pth b/sft/checkpoint-12500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd2ac01d1772d688780dd9ae168bec94a30db5c3 --- /dev/null +++ b/sft/checkpoint-12500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d4ffc2130d15db1248a21bbaa3efbe74673a03691da763b3dc2c522b7edd3a +size 14512 diff --git a/sft/checkpoint-12500/rng_state_1.pth b/sft/checkpoint-12500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..72d94b98ce884b797446706ce9a952117d77270a --- /dev/null +++ b/sft/checkpoint-12500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89758c6b20910a34c82a360a7c6525098588bc300962f6e7e066a369f5004e7 +size 14512 diff --git a/sft/checkpoint-12500/scaler.pt b/sft/checkpoint-12500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce2f43b579dd2cc9554ac51dc3478baa82345316 --- /dev/null +++ b/sft/checkpoint-12500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8520f8e129a9bb1e960bda1bf110e1da794936fd9569c60a57e6c83f34da7578 +size 988 diff --git a/sft/checkpoint-12500/scheduler.pt b/sft/checkpoint-12500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..da06a1cd6509de767e03a6852e015c1631faf443 --- /dev/null +++ b/sft/checkpoint-12500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37bc9c660f1f853b150673080e15c5bc757e395bf5ec1939da3b9ac52cc29ae8 +size 1064 diff --git a/sft/checkpoint-12500/special_tokens_map.json b/sft/checkpoint-12500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-12500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-12500/tokenizer.json b/sft/checkpoint-12500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-12500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-12500/tokenizer_config.json b/sft/checkpoint-12500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-12500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-12500/trainer_state.json b/sft/checkpoint-12500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d345713fdb8630d47065c4836c1418cac36fb5ad --- /dev/null +++ b/sft/checkpoint-12500/trainer_state.json @@ -0,0 +1,1784 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.6957834573492936, + "eval_steps": 500, + "global_step": 12500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + }, + { + "epoch": 2.383047557424782, + "grad_norm": 0.31749865412712097, + "learning_rate": 5.355325611978049e-06, + "loss": 0.8558, + "step": 11050 + }, + { + "epoch": 2.393831553973903, + "grad_norm": 0.3048788905143738, + "learning_rate": 5.176653955689878e-06, + "loss": 0.8696, + "step": 11100 + }, + { + "epoch": 2.404615550523024, + "grad_norm": 0.33253952860832214, + "learning_rate": 5.0006688484305095e-06, + "loss": 0.864, + "step": 11150 + }, + { + "epoch": 2.4153995470721448, + "grad_norm": 0.29218512773513794, + "learning_rate": 4.827394140493341e-06, + "loss": 0.8723, + "step": 11200 + }, + { + "epoch": 2.426183543621266, + "grad_norm": 0.31756484508514404, + "learning_rate": 4.656853314846244e-06, + "loss": 0.8501, + "step": 11250 + }, + { + "epoch": 2.436967540170387, + "grad_norm": 0.33288106322288513, + "learning_rate": 4.4890694839490685e-06, + "loss": 0.8823, + "step": 11300 + }, + { + "epoch": 2.447751536719508, + "grad_norm": 0.2978493273258209, + "learning_rate": 4.3240653866213235e-06, + "loss": 0.871, + "step": 11350 + }, + { + "epoch": 2.458535533268629, + "grad_norm": 0.26963189244270325, + "learning_rate": 4.161863384960549e-06, + "loss": 0.8671, + "step": 11400 + }, + { + "epoch": 2.4693195298177506, + "grad_norm": 0.25764644145965576, + "learning_rate": 4.002485461311631e-06, + "loss": 0.8537, + "step": 11450 + }, + { + "epoch": 2.4801035263668716, + "grad_norm": 0.3499026298522949, + "learning_rate": 3.8459532152877425e-06, + "loss": 0.8574, + "step": 11500 + }, + { + "epoch": 2.4908875229159926, + "grad_norm": 0.30950355529785156, + "learning_rate": 3.6922878608430076e-06, + "loss": 0.8412, + "step": 11550 + }, + { + "epoch": 2.5016715194651136, + "grad_norm": 0.28624480962753296, + "learning_rate": 3.5415102233974844e-06, + "loss": 0.8573, + "step": 11600 + }, + { + "epoch": 2.512455516014235, + "grad_norm": 0.28546643257141113, + "learning_rate": 3.393640737014875e-06, + "loss": 0.861, + "step": 11650 + }, + { + "epoch": 2.523239512563356, + "grad_norm": 0.29488733410835266, + "learning_rate": 3.2486994416331405e-06, + "loss": 0.8696, + "step": 11700 + }, + { + "epoch": 2.534023509112477, + "grad_norm": 0.29552578926086426, + "learning_rate": 3.1067059803486285e-06, + "loss": 0.847, + "step": 11750 + }, + { + "epoch": 2.5448075056615984, + "grad_norm": 0.2657387852668762, + "learning_rate": 2.967679596753953e-06, + "loss": 0.8769, + "step": 11800 + }, + { + "epoch": 2.5555915022107194, + "grad_norm": 0.29935380816459656, + "learning_rate": 2.831639132330019e-06, + "loss": 0.8501, + "step": 11850 + }, + { + "epoch": 2.5663754987598404, + "grad_norm": 0.31909531354904175, + "learning_rate": 2.698603023892515e-06, + "loss": 0.8857, + "step": 11900 + }, + { + "epoch": 2.5771594953089614, + "grad_norm": 0.2908760905265808, + "learning_rate": 2.5685893010933133e-06, + "loss": 0.8364, + "step": 11950 + }, + { + "epoch": 2.5879434918580824, + "grad_norm": 0.3088342249393463, + "learning_rate": 2.4416155839769724e-06, + "loss": 0.8797, + "step": 12000 + }, + { + "epoch": 2.598727488407204, + "grad_norm": 0.2893226742744446, + "learning_rate": 2.317699080592814e-06, + "loss": 0.876, + "step": 12050 + }, + { + "epoch": 2.609511484956325, + "grad_norm": 0.32050463557243347, + "learning_rate": 2.1968565846628013e-06, + "loss": 0.8733, + "step": 12100 + }, + { + "epoch": 2.620295481505446, + "grad_norm": 0.29306530952453613, + "learning_rate": 2.0791044733055736e-06, + "loss": 0.8418, + "step": 12150 + }, + { + "epoch": 2.631079478054567, + "grad_norm": 0.31728824973106384, + "learning_rate": 1.9644587048169545e-06, + "loss": 0.8645, + "step": 12200 + }, + { + "epoch": 2.641863474603688, + "grad_norm": 0.2657378315925598, + "learning_rate": 1.8529348165072209e-06, + "loss": 0.8963, + "step": 12250 + }, + { + "epoch": 2.652647471152809, + "grad_norm": 0.30267733335494995, + "learning_rate": 1.744547922595377e-06, + "loss": 0.8704, + "step": 12300 + }, + { + "epoch": 2.66343146770193, + "grad_norm": 0.28986382484436035, + "learning_rate": 1.639312712160862e-06, + "loss": 0.8573, + "step": 12350 + }, + { + "epoch": 2.674215464251051, + "grad_norm": 0.30205366015434265, + "learning_rate": 1.537243447152778e-06, + "loss": 0.8479, + "step": 12400 + }, + { + "epoch": 2.6849994608001726, + "grad_norm": 0.3245677053928375, + "learning_rate": 1.4383539604570674e-06, + "loss": 0.8543, + "step": 12450 + }, + { + "epoch": 2.6957834573492936, + "grad_norm": 0.3064013421535492, + "learning_rate": 1.3426576540218033e-06, + "loss": 0.8836, + "step": 12500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.5946307700650934e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-12500/training_args.bin b/sft/checkpoint-12500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-12500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-13000/README.md b/sft/checkpoint-13000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-13000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-13000/adapter_config.json b/sft/checkpoint-13000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-13000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-13000/adapter_model.safetensors b/sft/checkpoint-13000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b160be3d4521f9ebe7df266a785f500d61948b7 --- /dev/null +++ b/sft/checkpoint-13000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334260fc9ddfa27b59deb64e542fd64a638fb655620e162553c8b282aa22ccb8 +size 335604696 diff --git a/sft/checkpoint-13000/optimizer.pt b/sft/checkpoint-13000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..857bcf49165e4a6dde466aa207c52249d28ef530 --- /dev/null +++ b/sft/checkpoint-13000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6a5498534aba8f9d8e0cd95165924f0feb35c57026804da399f40edf6cccea +size 671466706 diff --git a/sft/checkpoint-13000/rng_state_0.pth b/sft/checkpoint-13000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..c11d280e553038e27f32b6c17cf8f79a876dd403 --- /dev/null +++ b/sft/checkpoint-13000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac94b0754554d13cb92abb8ed58dfb8108a38e4010e2fdfce15d6c5e5d1c1d7 +size 14512 diff --git a/sft/checkpoint-13000/rng_state_1.pth b/sft/checkpoint-13000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..6ee5ace9bb05fb6b3a63995b1e29c519f46874fb --- /dev/null +++ b/sft/checkpoint-13000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158bb387fd3cce0759e30f70c05bac04649a22f37abbb495be071b8234fd36b1 +size 14512 diff --git a/sft/checkpoint-13000/scaler.pt b/sft/checkpoint-13000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b66a8a2b883c200f96c3c2ea0b278cc48712fb45 --- /dev/null +++ b/sft/checkpoint-13000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb95197093f8fc9cab3f204fa83b274f0fe4b3527d0fd6330a64571a5e27685e +size 988 diff --git a/sft/checkpoint-13000/scheduler.pt b/sft/checkpoint-13000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d60a246f65e16668f1a8e5ba29efe799e4632499 --- /dev/null +++ b/sft/checkpoint-13000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14fa2bb71ce81e200ceee93fdb9426a117209cdcde2bc8e7a3d06417277cebf0 +size 1064 diff --git a/sft/checkpoint-13000/special_tokens_map.json b/sft/checkpoint-13000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-13000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-13000/tokenizer.json b/sft/checkpoint-13000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-13000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-13000/tokenizer_config.json b/sft/checkpoint-13000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-13000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-13000/trainer_state.json b/sft/checkpoint-13000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed8373f2d3390df4ce33458b7f17eff6b3c7024d --- /dev/null +++ b/sft/checkpoint-13000/trainer_state.json @@ -0,0 +1,1854 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.803623422840505, + "eval_steps": 500, + "global_step": 13000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + }, + { + "epoch": 2.383047557424782, + "grad_norm": 0.31749865412712097, + "learning_rate": 5.355325611978049e-06, + "loss": 0.8558, + "step": 11050 + }, + { + "epoch": 2.393831553973903, + "grad_norm": 0.3048788905143738, + "learning_rate": 5.176653955689878e-06, + "loss": 0.8696, + "step": 11100 + }, + { + "epoch": 2.404615550523024, + "grad_norm": 0.33253952860832214, + "learning_rate": 5.0006688484305095e-06, + "loss": 0.864, + "step": 11150 + }, + { + "epoch": 2.4153995470721448, + "grad_norm": 0.29218512773513794, + "learning_rate": 4.827394140493341e-06, + "loss": 0.8723, + "step": 11200 + }, + { + "epoch": 2.426183543621266, + "grad_norm": 0.31756484508514404, + "learning_rate": 4.656853314846244e-06, + "loss": 0.8501, + "step": 11250 + }, + { + "epoch": 2.436967540170387, + "grad_norm": 0.33288106322288513, + "learning_rate": 4.4890694839490685e-06, + "loss": 0.8823, + "step": 11300 + }, + { + "epoch": 2.447751536719508, + "grad_norm": 0.2978493273258209, + "learning_rate": 4.3240653866213235e-06, + "loss": 0.871, + "step": 11350 + }, + { + "epoch": 2.458535533268629, + "grad_norm": 0.26963189244270325, + "learning_rate": 4.161863384960549e-06, + "loss": 0.8671, + "step": 11400 + }, + { + "epoch": 2.4693195298177506, + "grad_norm": 0.25764644145965576, + "learning_rate": 4.002485461311631e-06, + "loss": 0.8537, + "step": 11450 + }, + { + "epoch": 2.4801035263668716, + "grad_norm": 0.3499026298522949, + "learning_rate": 3.8459532152877425e-06, + "loss": 0.8574, + "step": 11500 + }, + { + "epoch": 2.4908875229159926, + "grad_norm": 0.30950355529785156, + "learning_rate": 3.6922878608430076e-06, + "loss": 0.8412, + "step": 11550 + }, + { + "epoch": 2.5016715194651136, + "grad_norm": 0.28624480962753296, + "learning_rate": 3.5415102233974844e-06, + "loss": 0.8573, + "step": 11600 + }, + { + "epoch": 2.512455516014235, + "grad_norm": 0.28546643257141113, + "learning_rate": 3.393640737014875e-06, + "loss": 0.861, + "step": 11650 + }, + { + "epoch": 2.523239512563356, + "grad_norm": 0.29488733410835266, + "learning_rate": 3.2486994416331405e-06, + "loss": 0.8696, + "step": 11700 + }, + { + "epoch": 2.534023509112477, + "grad_norm": 0.29552578926086426, + "learning_rate": 3.1067059803486285e-06, + "loss": 0.847, + "step": 11750 + }, + { + "epoch": 2.5448075056615984, + "grad_norm": 0.2657387852668762, + "learning_rate": 2.967679596753953e-06, + "loss": 0.8769, + "step": 11800 + }, + { + "epoch": 2.5555915022107194, + "grad_norm": 0.29935380816459656, + "learning_rate": 2.831639132330019e-06, + "loss": 0.8501, + "step": 11850 + }, + { + "epoch": 2.5663754987598404, + "grad_norm": 0.31909531354904175, + "learning_rate": 2.698603023892515e-06, + "loss": 0.8857, + "step": 11900 + }, + { + "epoch": 2.5771594953089614, + "grad_norm": 0.2908760905265808, + "learning_rate": 2.5685893010933133e-06, + "loss": 0.8364, + "step": 11950 + }, + { + "epoch": 2.5879434918580824, + "grad_norm": 0.3088342249393463, + "learning_rate": 2.4416155839769724e-06, + "loss": 0.8797, + "step": 12000 + }, + { + "epoch": 2.598727488407204, + "grad_norm": 0.2893226742744446, + "learning_rate": 2.317699080592814e-06, + "loss": 0.876, + "step": 12050 + }, + { + "epoch": 2.609511484956325, + "grad_norm": 0.32050463557243347, + "learning_rate": 2.1968565846628013e-06, + "loss": 0.8733, + "step": 12100 + }, + { + "epoch": 2.620295481505446, + "grad_norm": 0.29306530952453613, + "learning_rate": 2.0791044733055736e-06, + "loss": 0.8418, + "step": 12150 + }, + { + "epoch": 2.631079478054567, + "grad_norm": 0.31728824973106384, + "learning_rate": 1.9644587048169545e-06, + "loss": 0.8645, + "step": 12200 + }, + { + "epoch": 2.641863474603688, + "grad_norm": 0.2657378315925598, + "learning_rate": 1.8529348165072209e-06, + "loss": 0.8963, + "step": 12250 + }, + { + "epoch": 2.652647471152809, + "grad_norm": 0.30267733335494995, + "learning_rate": 1.744547922595377e-06, + "loss": 0.8704, + "step": 12300 + }, + { + "epoch": 2.66343146770193, + "grad_norm": 0.28986382484436035, + "learning_rate": 1.639312712160862e-06, + "loss": 0.8573, + "step": 12350 + }, + { + "epoch": 2.674215464251051, + "grad_norm": 0.30205366015434265, + "learning_rate": 1.537243447152778e-06, + "loss": 0.8479, + "step": 12400 + }, + { + "epoch": 2.6849994608001726, + "grad_norm": 0.3245677053928375, + "learning_rate": 1.4383539604570674e-06, + "loss": 0.8543, + "step": 12450 + }, + { + "epoch": 2.6957834573492936, + "grad_norm": 0.3064013421535492, + "learning_rate": 1.3426576540218033e-06, + "loss": 0.8836, + "step": 12500 + }, + { + "epoch": 2.7065674538984146, + "grad_norm": 0.3094823956489563, + "learning_rate": 1.2501674970409217e-06, + "loss": 0.8732, + "step": 12550 + }, + { + "epoch": 2.717351450447536, + "grad_norm": 0.3052658438682556, + "learning_rate": 1.1608960241965393e-06, + "loss": 0.8875, + "step": 12600 + }, + { + "epoch": 2.728135446996657, + "grad_norm": 0.30494335293769836, + "learning_rate": 1.0748553339602452e-06, + "loss": 0.8603, + "step": 12650 + }, + { + "epoch": 2.738919443545778, + "grad_norm": 0.29887834191322327, + "learning_rate": 9.920570869534158e-07, + "loss": 0.868, + "step": 12700 + }, + { + "epoch": 2.7497034400948994, + "grad_norm": 0.26623794436454773, + "learning_rate": 9.125125043669497e-07, + "loss": 0.8328, + "step": 12750 + }, + { + "epoch": 2.7604874366440204, + "grad_norm": 0.31447434425354004, + "learning_rate": 8.362323664404925e-07, + "loss": 0.8303, + "step": 12800 + }, + { + "epoch": 2.7712714331931414, + "grad_norm": 0.3175729513168335, + "learning_rate": 7.6322701100148e-07, + "loss": 0.8707, + "step": 12850 + }, + { + "epoch": 2.7820554297422624, + "grad_norm": 0.3108314275741577, + "learning_rate": 6.935063320640811e-07, + "loss": 0.8679, + "step": 12900 + }, + { + "epoch": 2.7928394262913834, + "grad_norm": 0.30179736018180847, + "learning_rate": 6.270797784883431e-07, + "loss": 0.8519, + "step": 12950 + }, + { + "epoch": 2.803623422840505, + "grad_norm": 0.307117223739624, + "learning_rate": 5.639563526996073e-07, + "loss": 0.8787, + "step": 13000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.818433905177605e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-13000/training_args.bin b/sft/checkpoint-13000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-13000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-13500/README.md b/sft/checkpoint-13500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-13500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-13500/adapter_config.json b/sft/checkpoint-13500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-13500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-13500/adapter_model.safetensors b/sft/checkpoint-13500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d185e55c21e23a620671c4ab6381c982b16b4e6f --- /dev/null +++ b/sft/checkpoint-13500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9498e53999f257b9ab35f595b173495841b96824a334f6c82154e17b791e77e6 +size 335604696 diff --git a/sft/checkpoint-13500/optimizer.pt b/sft/checkpoint-13500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..65e56d7bd88dd85c4567a4616bd0dbaeb28a7719 --- /dev/null +++ b/sft/checkpoint-13500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616832f147fcb104b050e3fdf08c17f54305b85f88c8a4fc17d20833bce79267 +size 671466706 diff --git a/sft/checkpoint-13500/rng_state_0.pth b/sft/checkpoint-13500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f48d045d48f27a35d2c3466429e7e536ba0a52a --- /dev/null +++ b/sft/checkpoint-13500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb1dd38cea927206c58b9395d23ae7733e3eb94addafab6b17720faf894c903 +size 14512 diff --git a/sft/checkpoint-13500/rng_state_1.pth b/sft/checkpoint-13500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e198658d2efaa55897d49ea4802b4358cc433afc --- /dev/null +++ b/sft/checkpoint-13500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e510e9c94af95cfdd86eb0fb6413c665b494a0e17c7693a8f977e24e32301d7 +size 14512 diff --git a/sft/checkpoint-13500/scaler.pt b/sft/checkpoint-13500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eee3cd1b6adf465cec7f3b066bde7afe144475c --- /dev/null +++ b/sft/checkpoint-13500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77fecb452546c11d526d1cb8b8427ae29276a007baf58f38d7ea49058ea26b7b +size 988 diff --git a/sft/checkpoint-13500/scheduler.pt b/sft/checkpoint-13500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01edde8b7d68f7b3478736caba0ac93c6fb05987 --- /dev/null +++ b/sft/checkpoint-13500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:166b783bf4bcee88749f7ff4320a1208fcba939bf500942f312a408f0b291dcc +size 1064 diff --git a/sft/checkpoint-13500/special_tokens_map.json b/sft/checkpoint-13500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-13500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-13500/tokenizer.json b/sft/checkpoint-13500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-13500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-13500/tokenizer_config.json b/sft/checkpoint-13500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-13500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-13500/trainer_state.json b/sft/checkpoint-13500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b652a663ced2e2d0b9ffcd89a0c4b2a5ad8bf22 --- /dev/null +++ b/sft/checkpoint-13500/trainer_state.json @@ -0,0 +1,1924 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9114633883317156, + "eval_steps": 500, + "global_step": 13500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + }, + { + "epoch": 2.383047557424782, + "grad_norm": 0.31749865412712097, + "learning_rate": 5.355325611978049e-06, + "loss": 0.8558, + "step": 11050 + }, + { + "epoch": 2.393831553973903, + "grad_norm": 0.3048788905143738, + "learning_rate": 5.176653955689878e-06, + "loss": 0.8696, + "step": 11100 + }, + { + "epoch": 2.404615550523024, + "grad_norm": 0.33253952860832214, + "learning_rate": 5.0006688484305095e-06, + "loss": 0.864, + "step": 11150 + }, + { + "epoch": 2.4153995470721448, + "grad_norm": 0.29218512773513794, + "learning_rate": 4.827394140493341e-06, + "loss": 0.8723, + "step": 11200 + }, + { + "epoch": 2.426183543621266, + "grad_norm": 0.31756484508514404, + "learning_rate": 4.656853314846244e-06, + "loss": 0.8501, + "step": 11250 + }, + { + "epoch": 2.436967540170387, + "grad_norm": 0.33288106322288513, + "learning_rate": 4.4890694839490685e-06, + "loss": 0.8823, + "step": 11300 + }, + { + "epoch": 2.447751536719508, + "grad_norm": 0.2978493273258209, + "learning_rate": 4.3240653866213235e-06, + "loss": 0.871, + "step": 11350 + }, + { + "epoch": 2.458535533268629, + "grad_norm": 0.26963189244270325, + "learning_rate": 4.161863384960549e-06, + "loss": 0.8671, + "step": 11400 + }, + { + "epoch": 2.4693195298177506, + "grad_norm": 0.25764644145965576, + "learning_rate": 4.002485461311631e-06, + "loss": 0.8537, + "step": 11450 + }, + { + "epoch": 2.4801035263668716, + "grad_norm": 0.3499026298522949, + "learning_rate": 3.8459532152877425e-06, + "loss": 0.8574, + "step": 11500 + }, + { + "epoch": 2.4908875229159926, + "grad_norm": 0.30950355529785156, + "learning_rate": 3.6922878608430076e-06, + "loss": 0.8412, + "step": 11550 + }, + { + "epoch": 2.5016715194651136, + "grad_norm": 0.28624480962753296, + "learning_rate": 3.5415102233974844e-06, + "loss": 0.8573, + "step": 11600 + }, + { + "epoch": 2.512455516014235, + "grad_norm": 0.28546643257141113, + "learning_rate": 3.393640737014875e-06, + "loss": 0.861, + "step": 11650 + }, + { + "epoch": 2.523239512563356, + "grad_norm": 0.29488733410835266, + "learning_rate": 3.2486994416331405e-06, + "loss": 0.8696, + "step": 11700 + }, + { + "epoch": 2.534023509112477, + "grad_norm": 0.29552578926086426, + "learning_rate": 3.1067059803486285e-06, + "loss": 0.847, + "step": 11750 + }, + { + "epoch": 2.5448075056615984, + "grad_norm": 0.2657387852668762, + "learning_rate": 2.967679596753953e-06, + "loss": 0.8769, + "step": 11800 + }, + { + "epoch": 2.5555915022107194, + "grad_norm": 0.29935380816459656, + "learning_rate": 2.831639132330019e-06, + "loss": 0.8501, + "step": 11850 + }, + { + "epoch": 2.5663754987598404, + "grad_norm": 0.31909531354904175, + "learning_rate": 2.698603023892515e-06, + "loss": 0.8857, + "step": 11900 + }, + { + "epoch": 2.5771594953089614, + "grad_norm": 0.2908760905265808, + "learning_rate": 2.5685893010933133e-06, + "loss": 0.8364, + "step": 11950 + }, + { + "epoch": 2.5879434918580824, + "grad_norm": 0.3088342249393463, + "learning_rate": 2.4416155839769724e-06, + "loss": 0.8797, + "step": 12000 + }, + { + "epoch": 2.598727488407204, + "grad_norm": 0.2893226742744446, + "learning_rate": 2.317699080592814e-06, + "loss": 0.876, + "step": 12050 + }, + { + "epoch": 2.609511484956325, + "grad_norm": 0.32050463557243347, + "learning_rate": 2.1968565846628013e-06, + "loss": 0.8733, + "step": 12100 + }, + { + "epoch": 2.620295481505446, + "grad_norm": 0.29306530952453613, + "learning_rate": 2.0791044733055736e-06, + "loss": 0.8418, + "step": 12150 + }, + { + "epoch": 2.631079478054567, + "grad_norm": 0.31728824973106384, + "learning_rate": 1.9644587048169545e-06, + "loss": 0.8645, + "step": 12200 + }, + { + "epoch": 2.641863474603688, + "grad_norm": 0.2657378315925598, + "learning_rate": 1.8529348165072209e-06, + "loss": 0.8963, + "step": 12250 + }, + { + "epoch": 2.652647471152809, + "grad_norm": 0.30267733335494995, + "learning_rate": 1.744547922595377e-06, + "loss": 0.8704, + "step": 12300 + }, + { + "epoch": 2.66343146770193, + "grad_norm": 0.28986382484436035, + "learning_rate": 1.639312712160862e-06, + "loss": 0.8573, + "step": 12350 + }, + { + "epoch": 2.674215464251051, + "grad_norm": 0.30205366015434265, + "learning_rate": 1.537243447152778e-06, + "loss": 0.8479, + "step": 12400 + }, + { + "epoch": 2.6849994608001726, + "grad_norm": 0.3245677053928375, + "learning_rate": 1.4383539604570674e-06, + "loss": 0.8543, + "step": 12450 + }, + { + "epoch": 2.6957834573492936, + "grad_norm": 0.3064013421535492, + "learning_rate": 1.3426576540218033e-06, + "loss": 0.8836, + "step": 12500 + }, + { + "epoch": 2.7065674538984146, + "grad_norm": 0.3094823956489563, + "learning_rate": 1.2501674970409217e-06, + "loss": 0.8732, + "step": 12550 + }, + { + "epoch": 2.717351450447536, + "grad_norm": 0.3052658438682556, + "learning_rate": 1.1608960241965393e-06, + "loss": 0.8875, + "step": 12600 + }, + { + "epoch": 2.728135446996657, + "grad_norm": 0.30494335293769836, + "learning_rate": 1.0748553339602452e-06, + "loss": 0.8603, + "step": 12650 + }, + { + "epoch": 2.738919443545778, + "grad_norm": 0.29887834191322327, + "learning_rate": 9.920570869534158e-07, + "loss": 0.868, + "step": 12700 + }, + { + "epoch": 2.7497034400948994, + "grad_norm": 0.26623794436454773, + "learning_rate": 9.125125043669497e-07, + "loss": 0.8328, + "step": 12750 + }, + { + "epoch": 2.7604874366440204, + "grad_norm": 0.31447434425354004, + "learning_rate": 8.362323664404925e-07, + "loss": 0.8303, + "step": 12800 + }, + { + "epoch": 2.7712714331931414, + "grad_norm": 0.3175729513168335, + "learning_rate": 7.6322701100148e-07, + "loss": 0.8707, + "step": 12850 + }, + { + "epoch": 2.7820554297422624, + "grad_norm": 0.3108314275741577, + "learning_rate": 6.935063320640811e-07, + "loss": 0.8679, + "step": 12900 + }, + { + "epoch": 2.7928394262913834, + "grad_norm": 0.30179736018180847, + "learning_rate": 6.270797784883431e-07, + "loss": 0.8519, + "step": 12950 + }, + { + "epoch": 2.803623422840505, + "grad_norm": 0.307117223739624, + "learning_rate": 5.639563526996073e-07, + "loss": 0.8787, + "step": 13000 + }, + { + "epoch": 2.814407419389626, + "grad_norm": 0.2920386493206024, + "learning_rate": 5.041446094684987e-07, + "loss": 0.8828, + "step": 13050 + }, + { + "epoch": 2.825191415938747, + "grad_norm": 0.2995089292526245, + "learning_rate": 4.476526547515125e-07, + "loss": 0.8751, + "step": 13100 + }, + { + "epoch": 2.835975412487868, + "grad_norm": 0.3088924288749695, + "learning_rate": 3.9448814459247527e-07, + "loss": 0.8836, + "step": 13150 + }, + { + "epoch": 2.846759409036989, + "grad_norm": 0.3009397089481354, + "learning_rate": 3.4465828408495326e-07, + "loss": 0.8725, + "step": 13200 + }, + { + "epoch": 2.85754340558611, + "grad_norm": 0.2953643202781677, + "learning_rate": 2.981698263958055e-07, + "loss": 0.8527, + "step": 13250 + }, + { + "epoch": 2.868327402135231, + "grad_norm": 0.29561954736709595, + "learning_rate": 2.5502907184992985e-07, + "loss": 0.8637, + "step": 13300 + }, + { + "epoch": 2.879111398684352, + "grad_norm": 0.29808613657951355, + "learning_rate": 2.1524186707645167e-07, + "loss": 0.8392, + "step": 13350 + }, + { + "epoch": 2.8898953952334736, + "grad_norm": 0.3293234407901764, + "learning_rate": 1.7881360421633798e-07, + "loss": 0.8883, + "step": 13400 + }, + { + "epoch": 2.9006793917825946, + "grad_norm": 0.3593480587005615, + "learning_rate": 1.457492201916405e-07, + "loss": 0.8783, + "step": 13450 + }, + { + "epoch": 2.9114633883317156, + "grad_norm": 0.26941925287246704, + "learning_rate": 1.1605319603641973e-07, + "loss": 0.838, + "step": 13500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.042237040290116e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-13500/training_args.bin b/sft/checkpoint-13500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-13500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-13911/README.md b/sft/checkpoint-13911/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-13911/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-13911/adapter_config.json b/sft/checkpoint-13911/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-13911/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-13911/adapter_model.safetensors b/sft/checkpoint-13911/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..035f8dc9e20a825ab1d5dced50552e5a51cb8597 --- /dev/null +++ b/sft/checkpoint-13911/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db9062ddd9124c71dc4444b0caef7c0e0891e8e5169d33b082970a973b6a5c9 +size 335604696 diff --git a/sft/checkpoint-13911/optimizer.pt b/sft/checkpoint-13911/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8fee5216a4d04bf85402b3618f8c5369ba0e141 --- /dev/null +++ b/sft/checkpoint-13911/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f411f54b621deedc23a2b0504e68bf23bf5fa867f8657072fe27ad436cdc7da +size 671466706 diff --git a/sft/checkpoint-13911/rng_state_0.pth b/sft/checkpoint-13911/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc2a18c333ebd2958e64017a86e18b10af3d1a14 --- /dev/null +++ b/sft/checkpoint-13911/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c45fe34c2a7ce69ceb04b413c69648e73af6c652508de87048807e8572590a5 +size 14512 diff --git a/sft/checkpoint-13911/rng_state_1.pth b/sft/checkpoint-13911/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..126889aa46a78d04084f6ca99644c1390296c6a6 --- /dev/null +++ b/sft/checkpoint-13911/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4e342f67568e28216dad34f4b21ae346f680dbf5f4b3f21cea5477a864c6f1 +size 14512 diff --git a/sft/checkpoint-13911/scaler.pt b/sft/checkpoint-13911/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ce9820766205d99af96237ef6eec50390eb95a7 --- /dev/null +++ b/sft/checkpoint-13911/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3e73607383423ec5ba23b5d4a85f683cfb8f0ee413d012886266a0524da31d +size 988 diff --git a/sft/checkpoint-13911/scheduler.pt b/sft/checkpoint-13911/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9be8bffb0ea3db3cf8927f35b0dacdd8bfc923b2 --- /dev/null +++ b/sft/checkpoint-13911/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0afb2692ef0da27a291a3adae792d2b1f38cff173cbb9f392cd7b165c2f92b8 +size 1064 diff --git a/sft/checkpoint-13911/special_tokens_map.json b/sft/checkpoint-13911/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-13911/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-13911/tokenizer.json b/sft/checkpoint-13911/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-13911/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-13911/tokenizer_config.json b/sft/checkpoint-13911/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-13911/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-13911/trainer_state.json b/sft/checkpoint-13911/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3115a45f0b640c3f6bceed684a728fcd921fa371 --- /dev/null +++ b/sft/checkpoint-13911/trainer_state.json @@ -0,0 +1,1980 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 13911, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + }, + { + "epoch": 2.0595276609511486, + "grad_norm": 0.2882542908191681, + "learning_rate": 1.1827407857980522e-05, + "loss": 0.8754, + "step": 9550 + }, + { + "epoch": 2.0703116575002696, + "grad_norm": 0.33889445662498474, + "learning_rate": 1.1580944627437052e-05, + "loss": 0.8645, + "step": 9600 + }, + { + "epoch": 2.0810956540493906, + "grad_norm": 0.29919326305389404, + "learning_rate": 1.1336300007641628e-05, + "loss": 0.8685, + "step": 9650 + }, + { + "epoch": 2.091879650598512, + "grad_norm": 0.2923993468284607, + "learning_rate": 1.1098344650456325e-05, + "loss": 0.8577, + "step": 9700 + }, + { + "epoch": 2.102663647147633, + "grad_norm": 0.2865777611732483, + "learning_rate": 1.0857398452987955e-05, + "loss": 0.8968, + "step": 9750 + }, + { + "epoch": 2.113447643696754, + "grad_norm": 0.28677886724472046, + "learning_rate": 1.0618368924500005e-05, + "loss": 0.8678, + "step": 9800 + }, + { + "epoch": 2.124231640245875, + "grad_norm": 0.2737389802932739, + "learning_rate": 1.0381288459349405e-05, + "loss": 0.8865, + "step": 9850 + }, + { + "epoch": 2.1350156367949964, + "grad_norm": 0.27073368430137634, + "learning_rate": 1.0146189187747276e-05, + "loss": 0.8733, + "step": 9900 + }, + { + "epoch": 2.1457996333441174, + "grad_norm": 0.280775785446167, + "learning_rate": 9.913102971404456e-06, + "loss": 0.8408, + "step": 9950 + }, + { + "epoch": 2.1565836298932384, + "grad_norm": 0.2671400308609009, + "learning_rate": 9.682061399213525e-06, + "loss": 0.8792, + "step": 10000 + }, + { + "epoch": 2.1673676264423594, + "grad_norm": 0.3240983188152313, + "learning_rate": 9.45309578296762e-06, + "loss": 0.8739, + "step": 10050 + }, + { + "epoch": 2.178151622991481, + "grad_norm": 0.30578577518463135, + "learning_rate": 9.226237153117056e-06, + "loss": 0.8731, + "step": 10100 + }, + { + "epoch": 2.188935619540602, + "grad_norm": 0.2961669862270355, + "learning_rate": 9.001516254563835e-06, + "loss": 0.8861, + "step": 10150 + }, + { + "epoch": 2.1997196160897228, + "grad_norm": 0.31330254673957825, + "learning_rate": 8.778963542495015e-06, + "loss": 0.8327, + "step": 10200 + }, + { + "epoch": 2.2105036126388438, + "grad_norm": 0.3293406665325165, + "learning_rate": 8.558609178255252e-06, + "loss": 0.8567, + "step": 10250 + }, + { + "epoch": 2.221287609187965, + "grad_norm": 0.3065802752971649, + "learning_rate": 8.340483025259233e-06, + "loss": 0.8515, + "step": 10300 + }, + { + "epoch": 2.232071605737086, + "grad_norm": 0.2637750208377838, + "learning_rate": 8.124614644944412e-06, + "loss": 0.874, + "step": 10350 + }, + { + "epoch": 2.242855602286207, + "grad_norm": 0.26482629776000977, + "learning_rate": 7.911033292764774e-06, + "loss": 0.8373, + "step": 10400 + }, + { + "epoch": 2.2536395988353286, + "grad_norm": 0.27340102195739746, + "learning_rate": 7.699767914225903e-06, + "loss": 0.9063, + "step": 10450 + }, + { + "epoch": 2.2644235953844496, + "grad_norm": 0.25882843136787415, + "learning_rate": 7.490847140962273e-06, + "loss": 0.8377, + "step": 10500 + }, + { + "epoch": 2.2752075919335706, + "grad_norm": 0.3063746690750122, + "learning_rate": 7.284299286856877e-06, + "loss": 0.8767, + "step": 10550 + }, + { + "epoch": 2.2859915884826916, + "grad_norm": 0.27114883065223694, + "learning_rate": 7.080152344204028e-06, + "loss": 0.8517, + "step": 10600 + }, + { + "epoch": 2.2967755850318126, + "grad_norm": 0.26992297172546387, + "learning_rate": 6.878433979915719e-06, + "loss": 0.873, + "step": 10650 + }, + { + "epoch": 2.307559581580934, + "grad_norm": 0.30842849612236023, + "learning_rate": 6.6791715317721075e-06, + "loss": 0.8645, + "step": 10700 + }, + { + "epoch": 2.318343578130055, + "grad_norm": 0.2740515172481537, + "learning_rate": 6.482392004716492e-06, + "loss": 0.8772, + "step": 10750 + }, + { + "epoch": 2.329127574679176, + "grad_norm": 0.28314441442489624, + "learning_rate": 6.288122067195592e-06, + "loss": 0.87, + "step": 10800 + }, + { + "epoch": 2.3399115712282974, + "grad_norm": 0.2951704263687134, + "learning_rate": 6.096388047545232e-06, + "loss": 0.8801, + "step": 10850 + }, + { + "epoch": 2.3506955677774184, + "grad_norm": 0.3134472966194153, + "learning_rate": 5.907215930422244e-06, + "loss": 0.8598, + "step": 10900 + }, + { + "epoch": 2.3614795643265394, + "grad_norm": 0.3114987313747406, + "learning_rate": 5.7206313532829095e-06, + "loss": 0.8578, + "step": 10950 + }, + { + "epoch": 2.3722635608756604, + "grad_norm": 0.3185006380081177, + "learning_rate": 5.5366596029084535e-06, + "loss": 0.8713, + "step": 11000 + }, + { + "epoch": 2.383047557424782, + "grad_norm": 0.31749865412712097, + "learning_rate": 5.355325611978049e-06, + "loss": 0.8558, + "step": 11050 + }, + { + "epoch": 2.393831553973903, + "grad_norm": 0.3048788905143738, + "learning_rate": 5.176653955689878e-06, + "loss": 0.8696, + "step": 11100 + }, + { + "epoch": 2.404615550523024, + "grad_norm": 0.33253952860832214, + "learning_rate": 5.0006688484305095e-06, + "loss": 0.864, + "step": 11150 + }, + { + "epoch": 2.4153995470721448, + "grad_norm": 0.29218512773513794, + "learning_rate": 4.827394140493341e-06, + "loss": 0.8723, + "step": 11200 + }, + { + "epoch": 2.426183543621266, + "grad_norm": 0.31756484508514404, + "learning_rate": 4.656853314846244e-06, + "loss": 0.8501, + "step": 11250 + }, + { + "epoch": 2.436967540170387, + "grad_norm": 0.33288106322288513, + "learning_rate": 4.4890694839490685e-06, + "loss": 0.8823, + "step": 11300 + }, + { + "epoch": 2.447751536719508, + "grad_norm": 0.2978493273258209, + "learning_rate": 4.3240653866213235e-06, + "loss": 0.871, + "step": 11350 + }, + { + "epoch": 2.458535533268629, + "grad_norm": 0.26963189244270325, + "learning_rate": 4.161863384960549e-06, + "loss": 0.8671, + "step": 11400 + }, + { + "epoch": 2.4693195298177506, + "grad_norm": 0.25764644145965576, + "learning_rate": 4.002485461311631e-06, + "loss": 0.8537, + "step": 11450 + }, + { + "epoch": 2.4801035263668716, + "grad_norm": 0.3499026298522949, + "learning_rate": 3.8459532152877425e-06, + "loss": 0.8574, + "step": 11500 + }, + { + "epoch": 2.4908875229159926, + "grad_norm": 0.30950355529785156, + "learning_rate": 3.6922878608430076e-06, + "loss": 0.8412, + "step": 11550 + }, + { + "epoch": 2.5016715194651136, + "grad_norm": 0.28624480962753296, + "learning_rate": 3.5415102233974844e-06, + "loss": 0.8573, + "step": 11600 + }, + { + "epoch": 2.512455516014235, + "grad_norm": 0.28546643257141113, + "learning_rate": 3.393640737014875e-06, + "loss": 0.861, + "step": 11650 + }, + { + "epoch": 2.523239512563356, + "grad_norm": 0.29488733410835266, + "learning_rate": 3.2486994416331405e-06, + "loss": 0.8696, + "step": 11700 + }, + { + "epoch": 2.534023509112477, + "grad_norm": 0.29552578926086426, + "learning_rate": 3.1067059803486285e-06, + "loss": 0.847, + "step": 11750 + }, + { + "epoch": 2.5448075056615984, + "grad_norm": 0.2657387852668762, + "learning_rate": 2.967679596753953e-06, + "loss": 0.8769, + "step": 11800 + }, + { + "epoch": 2.5555915022107194, + "grad_norm": 0.29935380816459656, + "learning_rate": 2.831639132330019e-06, + "loss": 0.8501, + "step": 11850 + }, + { + "epoch": 2.5663754987598404, + "grad_norm": 0.31909531354904175, + "learning_rate": 2.698603023892515e-06, + "loss": 0.8857, + "step": 11900 + }, + { + "epoch": 2.5771594953089614, + "grad_norm": 0.2908760905265808, + "learning_rate": 2.5685893010933133e-06, + "loss": 0.8364, + "step": 11950 + }, + { + "epoch": 2.5879434918580824, + "grad_norm": 0.3088342249393463, + "learning_rate": 2.4416155839769724e-06, + "loss": 0.8797, + "step": 12000 + }, + { + "epoch": 2.598727488407204, + "grad_norm": 0.2893226742744446, + "learning_rate": 2.317699080592814e-06, + "loss": 0.876, + "step": 12050 + }, + { + "epoch": 2.609511484956325, + "grad_norm": 0.32050463557243347, + "learning_rate": 2.1968565846628013e-06, + "loss": 0.8733, + "step": 12100 + }, + { + "epoch": 2.620295481505446, + "grad_norm": 0.29306530952453613, + "learning_rate": 2.0791044733055736e-06, + "loss": 0.8418, + "step": 12150 + }, + { + "epoch": 2.631079478054567, + "grad_norm": 0.31728824973106384, + "learning_rate": 1.9644587048169545e-06, + "loss": 0.8645, + "step": 12200 + }, + { + "epoch": 2.641863474603688, + "grad_norm": 0.2657378315925598, + "learning_rate": 1.8529348165072209e-06, + "loss": 0.8963, + "step": 12250 + }, + { + "epoch": 2.652647471152809, + "grad_norm": 0.30267733335494995, + "learning_rate": 1.744547922595377e-06, + "loss": 0.8704, + "step": 12300 + }, + { + "epoch": 2.66343146770193, + "grad_norm": 0.28986382484436035, + "learning_rate": 1.639312712160862e-06, + "loss": 0.8573, + "step": 12350 + }, + { + "epoch": 2.674215464251051, + "grad_norm": 0.30205366015434265, + "learning_rate": 1.537243447152778e-06, + "loss": 0.8479, + "step": 12400 + }, + { + "epoch": 2.6849994608001726, + "grad_norm": 0.3245677053928375, + "learning_rate": 1.4383539604570674e-06, + "loss": 0.8543, + "step": 12450 + }, + { + "epoch": 2.6957834573492936, + "grad_norm": 0.3064013421535492, + "learning_rate": 1.3426576540218033e-06, + "loss": 0.8836, + "step": 12500 + }, + { + "epoch": 2.7065674538984146, + "grad_norm": 0.3094823956489563, + "learning_rate": 1.2501674970409217e-06, + "loss": 0.8732, + "step": 12550 + }, + { + "epoch": 2.717351450447536, + "grad_norm": 0.3052658438682556, + "learning_rate": 1.1608960241965393e-06, + "loss": 0.8875, + "step": 12600 + }, + { + "epoch": 2.728135446996657, + "grad_norm": 0.30494335293769836, + "learning_rate": 1.0748553339602452e-06, + "loss": 0.8603, + "step": 12650 + }, + { + "epoch": 2.738919443545778, + "grad_norm": 0.29887834191322327, + "learning_rate": 9.920570869534158e-07, + "loss": 0.868, + "step": 12700 + }, + { + "epoch": 2.7497034400948994, + "grad_norm": 0.26623794436454773, + "learning_rate": 9.125125043669497e-07, + "loss": 0.8328, + "step": 12750 + }, + { + "epoch": 2.7604874366440204, + "grad_norm": 0.31447434425354004, + "learning_rate": 8.362323664404925e-07, + "loss": 0.8303, + "step": 12800 + }, + { + "epoch": 2.7712714331931414, + "grad_norm": 0.3175729513168335, + "learning_rate": 7.6322701100148e-07, + "loss": 0.8707, + "step": 12850 + }, + { + "epoch": 2.7820554297422624, + "grad_norm": 0.3108314275741577, + "learning_rate": 6.935063320640811e-07, + "loss": 0.8679, + "step": 12900 + }, + { + "epoch": 2.7928394262913834, + "grad_norm": 0.30179736018180847, + "learning_rate": 6.270797784883431e-07, + "loss": 0.8519, + "step": 12950 + }, + { + "epoch": 2.803623422840505, + "grad_norm": 0.307117223739624, + "learning_rate": 5.639563526996073e-07, + "loss": 0.8787, + "step": 13000 + }, + { + "epoch": 2.814407419389626, + "grad_norm": 0.2920386493206024, + "learning_rate": 5.041446094684987e-07, + "loss": 0.8828, + "step": 13050 + }, + { + "epoch": 2.825191415938747, + "grad_norm": 0.2995089292526245, + "learning_rate": 4.476526547515125e-07, + "loss": 0.8751, + "step": 13100 + }, + { + "epoch": 2.835975412487868, + "grad_norm": 0.3088924288749695, + "learning_rate": 3.9448814459247527e-07, + "loss": 0.8836, + "step": 13150 + }, + { + "epoch": 2.846759409036989, + "grad_norm": 0.3009397089481354, + "learning_rate": 3.4465828408495326e-07, + "loss": 0.8725, + "step": 13200 + }, + { + "epoch": 2.85754340558611, + "grad_norm": 0.2953643202781677, + "learning_rate": 2.981698263958055e-07, + "loss": 0.8527, + "step": 13250 + }, + { + "epoch": 2.868327402135231, + "grad_norm": 0.29561954736709595, + "learning_rate": 2.5502907184992985e-07, + "loss": 0.8637, + "step": 13300 + }, + { + "epoch": 2.879111398684352, + "grad_norm": 0.29808613657951355, + "learning_rate": 2.1524186707645167e-07, + "loss": 0.8392, + "step": 13350 + }, + { + "epoch": 2.8898953952334736, + "grad_norm": 0.3293234407901764, + "learning_rate": 1.7881360421633798e-07, + "loss": 0.8883, + "step": 13400 + }, + { + "epoch": 2.9006793917825946, + "grad_norm": 0.3593480587005615, + "learning_rate": 1.457492201916405e-07, + "loss": 0.8783, + "step": 13450 + }, + { + "epoch": 2.9114633883317156, + "grad_norm": 0.26941925287246704, + "learning_rate": 1.1605319603641973e-07, + "loss": 0.838, + "step": 13500 + }, + { + "epoch": 2.922247384880837, + "grad_norm": 0.3049161434173584, + "learning_rate": 8.972955628945012e-08, + "loss": 0.8738, + "step": 13550 + }, + { + "epoch": 2.933031381429958, + "grad_norm": 0.2638503313064575, + "learning_rate": 6.678186844880641e-08, + "loss": 0.845, + "step": 13600 + }, + { + "epoch": 2.943815377979079, + "grad_norm": 0.3131190538406372, + "learning_rate": 4.721324248836978e-08, + "loss": 0.8371, + "step": 13650 + }, + { + "epoch": 2.9545993745282004, + "grad_norm": 0.31689196825027466, + "learning_rate": 3.102633043635672e-08, + "loss": 0.8923, + "step": 13700 + }, + { + "epoch": 2.9653833710773214, + "grad_norm": 0.2884679138660431, + "learning_rate": 1.8223326015906483e-08, + "loss": 0.8558, + "step": 13750 + }, + { + "epoch": 2.9761673676264424, + "grad_norm": 0.2746738791465759, + "learning_rate": 8.8059643477717e-09, + "loss": 0.8576, + "step": 13800 + }, + { + "epoch": 2.9869513641755634, + "grad_norm": 0.30393725633621216, + "learning_rate": 2.7755217151648193e-09, + "loss": 0.8458, + "step": 13850 + }, + { + "epoch": 2.9977353607246844, + "grad_norm": 0.3150463402271271, + "learning_rate": 1.3281539079923465e-10, + "loss": 0.8992, + "step": 13900 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6.225979414269028e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-13911/training_args.bin b/sft/checkpoint-13911/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-13911/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-4500/README.md b/sft/checkpoint-4500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-4500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-4500/adapter_config.json b/sft/checkpoint-4500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-4500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-4500/adapter_model.safetensors b/sft/checkpoint-4500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cec5061b7ad3dfa455e6db8e6769be813ae9f529 --- /dev/null +++ b/sft/checkpoint-4500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:995e1e5c8c146552c365d741d9f51a338d78e0efe33a751a14d5448ead68fe2d +size 335604696 diff --git a/sft/checkpoint-4500/optimizer.pt b/sft/checkpoint-4500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f678afc14c95ef432c34cdf440ac0377d7a49d8 --- /dev/null +++ b/sft/checkpoint-4500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8529ed20cd150692755e831eedc9270226fc14891dc7677a2b365d2a936d4443 +size 671466706 diff --git a/sft/checkpoint-4500/rng_state_0.pth b/sft/checkpoint-4500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..fce6add04c8425289ebbb51eb9aab0bb23431878 --- /dev/null +++ b/sft/checkpoint-4500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdd6bea78a5d7cbf9076a2e823dc5648b9ffff9bd6bc7b584da94643b0a3f5a +size 14512 diff --git a/sft/checkpoint-4500/rng_state_1.pth b/sft/checkpoint-4500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c5521aed7a86c5a5e03d7266c80615929726521c --- /dev/null +++ b/sft/checkpoint-4500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8756a6ffe8e38d5fbf684aafbcfdb9d230f8606df5bc9a1a96356346c450ed8a +size 14512 diff --git a/sft/checkpoint-4500/scaler.pt b/sft/checkpoint-4500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eb1718239e9101b6a6e609988d0ab4c61c50ef6 --- /dev/null +++ b/sft/checkpoint-4500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25aed798df1ea1f11d3cdd0a7de18dd9b632beaa14981af70f644c899539b4cc +size 988 diff --git a/sft/checkpoint-4500/scheduler.pt b/sft/checkpoint-4500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dac827818e83044fd3e2ba1f6c8a16dc460a925 --- /dev/null +++ b/sft/checkpoint-4500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8fbecb9e857496fb89d3f22b3abf86f676edbcbdd2934d9310d3bee13c1500 +size 1064 diff --git a/sft/checkpoint-4500/special_tokens_map.json b/sft/checkpoint-4500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-4500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-4500/tokenizer.json b/sft/checkpoint-4500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-4500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-4500/tokenizer_config.json b/sft/checkpoint-4500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-4500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-4500/trainer_state.json b/sft/checkpoint-4500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1a8590a9ddbf8d3de91fecbe16c2af23b8fd1d15 --- /dev/null +++ b/sft/checkpoint-4500/trainer_state.json @@ -0,0 +1,664 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9705596894208994, + "eval_steps": 500, + "global_step": 4500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.0142282160126034e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-4500/training_args.bin b/sft/checkpoint-4500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-4500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-5000/README.md b/sft/checkpoint-5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-5000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-5000/adapter_config.json b/sft/checkpoint-5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-5000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-5000/adapter_model.safetensors b/sft/checkpoint-5000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33c67892a5707e9aece86c504800c084949515af --- /dev/null +++ b/sft/checkpoint-5000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7d4d5a5837080543dd943a6207abf599c4b0f01cf9f3a65a88029240db204f +size 335604696 diff --git a/sft/checkpoint-5000/optimizer.pt b/sft/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..733b1f7f8db80907cd2d40408cc137123ca858ad --- /dev/null +++ b/sft/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851f52dd3e58c4abe6b279f4449f83cd454f63f85e57ffabf8046a96472a6f50 +size 671466706 diff --git a/sft/checkpoint-5000/rng_state_0.pth b/sft/checkpoint-5000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..c44fb10f258295502c7ff905699accf0a12a106f --- /dev/null +++ b/sft/checkpoint-5000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14d93050c566255fd6810aca21301738ee2004cd1f89862c7952596a29c5f8b +size 14512 diff --git a/sft/checkpoint-5000/rng_state_1.pth b/sft/checkpoint-5000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..93c9967a49b0279ace5ead1e857aee5a72ce464d --- /dev/null +++ b/sft/checkpoint-5000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b2af14231134d9b1092a19c594a67d4e5a239406490f17f7e8117c73760ce94 +size 14512 diff --git a/sft/checkpoint-5000/scaler.pt b/sft/checkpoint-5000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..da81849ef9b20a8870285eaa92c68ae84344e397 --- /dev/null +++ b/sft/checkpoint-5000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41216328ab75de937007afb04d76156949bedb908461001a451c8991c2ba8cca +size 988 diff --git a/sft/checkpoint-5000/scheduler.pt b/sft/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed5dd2dc90faf58c72a19f2fc0d06135fad1f087 --- /dev/null +++ b/sft/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0115438842f045d17d7939469a324630572a2f124e870cd6a207f2a0fe6765b +size 1064 diff --git a/sft/checkpoint-5000/special_tokens_map.json b/sft/checkpoint-5000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-5000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-5000/tokenizer.json b/sft/checkpoint-5000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-5000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-5000/tokenizer_config.json b/sft/checkpoint-5000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-5000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-5000/trainer_state.json b/sft/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d49fa6b600dac35da9b0e751d626c65cddb4debb --- /dev/null +++ b/sft/checkpoint-5000/trainer_state.json @@ -0,0 +1,734 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0782918149466192, + "eval_steps": 500, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.237807547251268e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-5000/training_args.bin b/sft/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-5500/README.md b/sft/checkpoint-5500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-5500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-5500/adapter_config.json b/sft/checkpoint-5500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-5500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-5500/adapter_model.safetensors b/sft/checkpoint-5500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..186a531ddda24e86aa2b7ebea04c81844f6d54ab --- /dev/null +++ b/sft/checkpoint-5500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a954d4aa980c16ef70893403b8bfbfe0e639fecba8b096141ce4fc7770671e54 +size 335604696 diff --git a/sft/checkpoint-5500/optimizer.pt b/sft/checkpoint-5500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a46263b2055f0cafebc6bc2e3bc82452e73b107d --- /dev/null +++ b/sft/checkpoint-5500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:763033a9613eb37c5bfe6c96c4f53f8c4f45226cffcd5ab6a75a1ef5dd9e1825 +size 671466706 diff --git a/sft/checkpoint-5500/rng_state_0.pth b/sft/checkpoint-5500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..0ed77a29a996cb6935a4a17373a69be86d00bb6f --- /dev/null +++ b/sft/checkpoint-5500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4a4c251f7da797a1238e3d7f349694506dd66052881f097cbff5e00f364d77 +size 14512 diff --git a/sft/checkpoint-5500/rng_state_1.pth b/sft/checkpoint-5500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..01d96ee774a84d250fb15ae4fdd7c35ffa5cdc85 --- /dev/null +++ b/sft/checkpoint-5500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f295c72504cef8c0c1e15a9c3b8893846e2d018c656c76aa1f5b464b294798b8 +size 14512 diff --git a/sft/checkpoint-5500/scaler.pt b/sft/checkpoint-5500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..69f6088312209ae4e3360aae009559b7cc312734 --- /dev/null +++ b/sft/checkpoint-5500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165682a2e7bf7bc72fdb6bc7e86132478cb636e61812d7d9702660fa5fc8190f +size 988 diff --git a/sft/checkpoint-5500/scheduler.pt b/sft/checkpoint-5500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5f0d15db3115281b926803a20ad2f2250cbc3ad --- /dev/null +++ b/sft/checkpoint-5500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00255e18d412b5e6b0146f513ff748b650205b00339bedb13a08e4def5ba0f0c +size 1064 diff --git a/sft/checkpoint-5500/special_tokens_map.json b/sft/checkpoint-5500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-5500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-5500/tokenizer.json b/sft/checkpoint-5500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-5500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-5500/tokenizer_config.json b/sft/checkpoint-5500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-5500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-5500/trainer_state.json b/sft/checkpoint-5500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bd4409f9a16c41007411799db4625752f27cd4db --- /dev/null +++ b/sft/checkpoint-5500/trainer_state.json @@ -0,0 +1,804 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1861317804378302, + "eval_steps": 500, + "global_step": 5500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.4616106823637795e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-5500/training_args.bin b/sft/checkpoint-5500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-5500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-6000/README.md b/sft/checkpoint-6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-6000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-6000/adapter_config.json b/sft/checkpoint-6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-6000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-6000/adapter_model.safetensors b/sft/checkpoint-6000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac7b885ba9c360ce52a760c8da0664858eab3c32 --- /dev/null +++ b/sft/checkpoint-6000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d231dd3c949c01bc486b636d981e7ebaeb3ae472c2b3349e397076f5a7ec9bf1 +size 335604696 diff --git a/sft/checkpoint-6000/optimizer.pt b/sft/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebadf1b5aa444ad2776de0181c6d269751d95d00 --- /dev/null +++ b/sft/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf22eae1de3d494de39c594e8e6699f623de2e7f4e5b1319a8f7750d4ed0547 +size 671466706 diff --git a/sft/checkpoint-6000/rng_state_0.pth b/sft/checkpoint-6000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..98d9e0fe77696131b5cedc8d8092233a03b09d83 --- /dev/null +++ b/sft/checkpoint-6000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c226a5bb2cac72fe2933035a6765bb1d01f0f96dd8811158956f5756c09cd5b +size 14512 diff --git a/sft/checkpoint-6000/rng_state_1.pth b/sft/checkpoint-6000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc6ec77b25df8c2b4667d4cd906a86b4be8e4512 --- /dev/null +++ b/sft/checkpoint-6000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:372abd171aec7b07105e21e43517a6a079f22bd6d3125ad94d31e970a6b69d62 +size 14512 diff --git a/sft/checkpoint-6000/scaler.pt b/sft/checkpoint-6000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d88292a1ae3f182fc880ae3e2f005428d40f0446 --- /dev/null +++ b/sft/checkpoint-6000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d06a1db8a8df714f7bbc3ad704772e0855fb4c28e2bab1f6e9e5c2502cbe536 +size 988 diff --git a/sft/checkpoint-6000/scheduler.pt b/sft/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..44d59f1ef301e929e1890f6cfbe2eba595f1a3f7 --- /dev/null +++ b/sft/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76024b183007b4549bbf8244476ecec8d8b7146d9ad057ec7c1317e63cd8636d +size 1064 diff --git a/sft/checkpoint-6000/special_tokens_map.json b/sft/checkpoint-6000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-6000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-6000/tokenizer.json b/sft/checkpoint-6000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-6000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-6000/tokenizer_config.json b/sft/checkpoint-6000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-6000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-6000/trainer_state.json b/sft/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0bfb3293ecdc8421ef0c63356bc14680859ff054 --- /dev/null +++ b/sft/checkpoint-6000/trainer_state.json @@ -0,0 +1,874 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2939717459290412, + "eval_steps": 500, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.685413817476291e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-6000/training_args.bin b/sft/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-6500/README.md b/sft/checkpoint-6500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-6500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-6500/adapter_config.json b/sft/checkpoint-6500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-6500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-6500/adapter_model.safetensors b/sft/checkpoint-6500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85b0ca6422072e9ee3b4edc47cbe037674602f2a --- /dev/null +++ b/sft/checkpoint-6500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0afee94747b0343e6bb3f739771869aff3dc841af139b02fc402886c1307cd +size 335604696 diff --git a/sft/checkpoint-6500/optimizer.pt b/sft/checkpoint-6500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2292339665070d7f75309f6d75191c3114a94e3 --- /dev/null +++ b/sft/checkpoint-6500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161a2d8f9af20d3b5badd1b6bb71290b5c1a17d8dc2c724ccae6f74ef50406e8 +size 671466706 diff --git a/sft/checkpoint-6500/rng_state_0.pth b/sft/checkpoint-6500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..64290d515030ab8723334e0e7d7e3a32e1613116 --- /dev/null +++ b/sft/checkpoint-6500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6add2cbe2d6428b1ac89db25725204155066c024789e58f72a58e22041f8ec04 +size 14512 diff --git a/sft/checkpoint-6500/rng_state_1.pth b/sft/checkpoint-6500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..04437831c6e569c04701044c1bff72dc9e66484a --- /dev/null +++ b/sft/checkpoint-6500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f990bdd1daaf5e998f86d8d7d3d2237cdb7cc3e267b90b48fe9817146d3cb1 +size 14512 diff --git a/sft/checkpoint-6500/scaler.pt b/sft/checkpoint-6500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..32b0573dc4bc154e3ee0b2aa025a361616ddca01 --- /dev/null +++ b/sft/checkpoint-6500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b43e3ceec6532faa1674c05fecc679f0614b6e40ac3befed452f1397c7facc5 +size 988 diff --git a/sft/checkpoint-6500/scheduler.pt b/sft/checkpoint-6500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..18c13d64a703ea487ab66ad0d68a7e05932df708 --- /dev/null +++ b/sft/checkpoint-6500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b202251e03e96fcf1e077612b5ca87514e2f2f0ae2fd74099375412e93433487 +size 1064 diff --git a/sft/checkpoint-6500/special_tokens_map.json b/sft/checkpoint-6500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-6500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-6500/tokenizer.json b/sft/checkpoint-6500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-6500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-6500/tokenizer_config.json b/sft/checkpoint-6500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-6500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-6500/trainer_state.json b/sft/checkpoint-6500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0a8d8f1ef047876b5fbe37a4c475adaeb2fa86f8 --- /dev/null +++ b/sft/checkpoint-6500/trainer_state.json @@ -0,0 +1,944 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4018117114202524, + "eval_steps": 500, + "global_step": 6500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9092169525888025e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-6500/training_args.bin b/sft/checkpoint-6500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-6500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-7000/README.md b/sft/checkpoint-7000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-7000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-7000/adapter_config.json b/sft/checkpoint-7000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-7000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-7000/adapter_model.safetensors b/sft/checkpoint-7000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd105dfb694e5a58fb166a3d9376fcb886709dbb --- /dev/null +++ b/sft/checkpoint-7000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6892daf955e3230b333839bc98287e396b4d08109604d81f7eae960a572c2f +size 335604696 diff --git a/sft/checkpoint-7000/optimizer.pt b/sft/checkpoint-7000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..eec8c3c1098540c70e229b54eed4db9d367c9197 --- /dev/null +++ b/sft/checkpoint-7000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f841bbb15dff13153b31a81607bd0d7462862a07b1fd148aac6f51495c4b44a6 +size 671466706 diff --git a/sft/checkpoint-7000/rng_state_0.pth b/sft/checkpoint-7000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..90cee8f78e3e2b55ba86f3d566780700b7b457d2 --- /dev/null +++ b/sft/checkpoint-7000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f24f8ce8bdf9a64da42a0c3204bc01714e23b63d1ab01ccb0d892eb9c6eeddd +size 14512 diff --git a/sft/checkpoint-7000/rng_state_1.pth b/sft/checkpoint-7000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..90dab0a3425c99ab7753a36dec622774b2da318a --- /dev/null +++ b/sft/checkpoint-7000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc9e2ddbd7a9c973186db8c446942d042981d8a628dc11a3ac8b0401aff1aca0 +size 14512 diff --git a/sft/checkpoint-7000/scaler.pt b/sft/checkpoint-7000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..af134ed107113b58e1a3b68c59ca47ebbc5775af --- /dev/null +++ b/sft/checkpoint-7000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed14731770e638c57edcebffd56d36f7d5762a3bdbb04a710325fd8552394ad +size 988 diff --git a/sft/checkpoint-7000/scheduler.pt b/sft/checkpoint-7000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b60f49b376c162a63f8b2ec6ce944f3109d2767a --- /dev/null +++ b/sft/checkpoint-7000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee56f575059bb896ee4f2516b1d83e3181ec8e20f8bee1ffa6fb265f731d79c +size 1064 diff --git a/sft/checkpoint-7000/special_tokens_map.json b/sft/checkpoint-7000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-7000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-7000/tokenizer.json b/sft/checkpoint-7000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-7000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-7000/tokenizer_config.json b/sft/checkpoint-7000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-7000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-7000/trainer_state.json b/sft/checkpoint-7000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d28b8832ac676ba4883bb6561b11f07ecadf8944 --- /dev/null +++ b/sft/checkpoint-7000/trainer_state.json @@ -0,0 +1,1014 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5096516769114634, + "eval_steps": 500, + "global_step": 7000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.133020087701314e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-7000/training_args.bin b/sft/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-7500/README.md b/sft/checkpoint-7500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-7500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-7500/adapter_config.json b/sft/checkpoint-7500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-7500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-7500/adapter_model.safetensors b/sft/checkpoint-7500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35506f405e6eb90a484ab8d01291171525c535ce --- /dev/null +++ b/sft/checkpoint-7500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a957a01e7d6eaf33b86762a683645d649997a1c1eaf9aefe96e3bdea81db74dc +size 335604696 diff --git a/sft/checkpoint-7500/optimizer.pt b/sft/checkpoint-7500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..49945e8f2f47e8bad13fb27f026f221bc2953349 --- /dev/null +++ b/sft/checkpoint-7500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3691f8cfb0a2c0e0d6c7b1b66c42e9813c9dd0480ccfbc01a8276c02af707d1f +size 671466706 diff --git a/sft/checkpoint-7500/rng_state_0.pth b/sft/checkpoint-7500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ba699530ae9c459915dfed6e1e64032d4564712 --- /dev/null +++ b/sft/checkpoint-7500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39ab57462f4d2bcaaa6dd7c654fd96d45ca2585b1c969598b79c2bcbb1d83f5e +size 14512 diff --git a/sft/checkpoint-7500/rng_state_1.pth b/sft/checkpoint-7500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7fba9287260ff5bc0f267af8c0ef57e42456999 --- /dev/null +++ b/sft/checkpoint-7500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2d0ce892cb06b92657286e31c13649f021ebc8d6107a8ad7a6c35d821ff360 +size 14512 diff --git a/sft/checkpoint-7500/scaler.pt b/sft/checkpoint-7500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f2ebe8227e733e0c96c514240d298434eadf9b8 --- /dev/null +++ b/sft/checkpoint-7500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eabcda2891f6f2cf84a9603d15511efe9fed7aa47c0dfc1dbea258550d5a8310 +size 988 diff --git a/sft/checkpoint-7500/scheduler.pt b/sft/checkpoint-7500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b7c0193115cc0a4595eea901871a641fed23b78 --- /dev/null +++ b/sft/checkpoint-7500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db850b7ecf21db1950026ab4f0be13ac967235fd05397e5fcd31648892983cb6 +size 1064 diff --git a/sft/checkpoint-7500/special_tokens_map.json b/sft/checkpoint-7500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-7500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-7500/tokenizer.json b/sft/checkpoint-7500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-7500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-7500/tokenizer_config.json b/sft/checkpoint-7500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-7500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-7500/trainer_state.json b/sft/checkpoint-7500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2bb68ecfdd29112b6301e1b2b9bf1daa5952204b --- /dev/null +++ b/sft/checkpoint-7500/trainer_state.json @@ -0,0 +1,1084 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6174916424026744, + "eval_steps": 500, + "global_step": 7500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.3568232228138254e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-7500/training_args.bin b/sft/checkpoint-7500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-7500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-8000/README.md b/sft/checkpoint-8000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-8000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-8000/adapter_config.json b/sft/checkpoint-8000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-8000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-8000/adapter_model.safetensors b/sft/checkpoint-8000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9aec6b10c585cfc5a91a77eb9b606c1d2141576c --- /dev/null +++ b/sft/checkpoint-8000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:984e3e45eb21e0a93378dde19cb90bab0afdb0482b96ebce6546fd9b88a194b2 +size 335604696 diff --git a/sft/checkpoint-8000/optimizer.pt b/sft/checkpoint-8000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf3b6acca20fa35a50406beee8c5f04822e02788 --- /dev/null +++ b/sft/checkpoint-8000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3d1fa85fb889b36767d6c4add7f72281fda888f0b0c9a3e0660af2c09f5030 +size 671466706 diff --git a/sft/checkpoint-8000/rng_state_0.pth b/sft/checkpoint-8000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ddb5c0344d275517ea75405631336f32f314b07 --- /dev/null +++ b/sft/checkpoint-8000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc89c340f3f37b7b69f1a24e8dc869f140501a676ede691a3bac76da93ce61c1 +size 14512 diff --git a/sft/checkpoint-8000/rng_state_1.pth b/sft/checkpoint-8000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c81b3753d0468fdbddc14ea8582ed2bb44d56fd --- /dev/null +++ b/sft/checkpoint-8000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb4cd26adbf859a34b02eff09e2c9af1309ec279912b5ae30b57f7c78299d6c +size 14512 diff --git a/sft/checkpoint-8000/scaler.pt b/sft/checkpoint-8000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36f2051cc6714288ec556e4a92becc3d96aa9f46 --- /dev/null +++ b/sft/checkpoint-8000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d48ef48e7f19b63754d76fe78c9e350ff8eab3ca021743d5cf13b1d544c1103f +size 988 diff --git a/sft/checkpoint-8000/scheduler.pt b/sft/checkpoint-8000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..757ba49f90922520da63332e76f0b4630b06cbd5 --- /dev/null +++ b/sft/checkpoint-8000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69db3c208e1f6d816ae60447e9d56f0476a0074d4a8e27e0937490931f580232 +size 1064 diff --git a/sft/checkpoint-8000/special_tokens_map.json b/sft/checkpoint-8000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-8000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-8000/tokenizer.json b/sft/checkpoint-8000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-8000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-8000/tokenizer_config.json b/sft/checkpoint-8000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-8000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-8000/trainer_state.json b/sft/checkpoint-8000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c7cd9fdd32744dd0fe7d1e989c7795b2b39e06 --- /dev/null +++ b/sft/checkpoint-8000/trainer_state.json @@ -0,0 +1,1154 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7253316078938856, + "eval_steps": 500, + "global_step": 8000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.580626357926337e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-8000/training_args.bin b/sft/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-8000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-8500/README.md b/sft/checkpoint-8500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-8500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-8500/adapter_config.json b/sft/checkpoint-8500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-8500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-8500/adapter_model.safetensors b/sft/checkpoint-8500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe65c866245d0e76d32d529697f216ad5b58225b --- /dev/null +++ b/sft/checkpoint-8500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9536bb45be1d8c085640c54ef22abcf66139a57a10a7550b3de7f64973f4205a +size 335604696 diff --git a/sft/checkpoint-8500/optimizer.pt b/sft/checkpoint-8500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fb16399fa81bd382b349e215e87bcbaa53a214d --- /dev/null +++ b/sft/checkpoint-8500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb6ba912365456b4a1ed5e33b6379f669b0f06ab61ac794237edc671df433ca +size 671466706 diff --git a/sft/checkpoint-8500/rng_state_0.pth b/sft/checkpoint-8500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..38f7f8aa1181899c6fde0149d20d0ca02b13dcec --- /dev/null +++ b/sft/checkpoint-8500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4aca561e1a68e18cde37285b3d4cdafcf024a7b9ee0bf77f1df8f81414c016d +size 14512 diff --git a/sft/checkpoint-8500/rng_state_1.pth b/sft/checkpoint-8500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec04d43495d4bc3550cb14b2c0da67af0072cf21 --- /dev/null +++ b/sft/checkpoint-8500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ad1afce29ef47526b7106632c4e013b3f0aac94bfa507ec9d4ad479ab73db5 +size 14512 diff --git a/sft/checkpoint-8500/scaler.pt b/sft/checkpoint-8500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..430e1c4ab925858e6be65046a8fd44e30eb6293e --- /dev/null +++ b/sft/checkpoint-8500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:360003f823a40049daa0275f08aa5c50eeb500508105c28460680ca3ab204cf9 +size 988 diff --git a/sft/checkpoint-8500/scheduler.pt b/sft/checkpoint-8500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ec276398400e137f1e1693db9c3882d24f2382a --- /dev/null +++ b/sft/checkpoint-8500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed90d71992bb0c6a7ab985e8e0c9fe87d246413adb296b87de0fe2296039ddc8 +size 1064 diff --git a/sft/checkpoint-8500/special_tokens_map.json b/sft/checkpoint-8500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-8500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-8500/tokenizer.json b/sft/checkpoint-8500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-8500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-8500/tokenizer_config.json b/sft/checkpoint-8500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-8500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-8500/trainer_state.json b/sft/checkpoint-8500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8aa3d434b9fabfae63234f3224c527b580dd4e17 --- /dev/null +++ b/sft/checkpoint-8500/trainer_state.json @@ -0,0 +1,1224 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.8331715733850964, + "eval_steps": 500, + "global_step": 8500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.804429493038848e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-8500/training_args.bin b/sft/checkpoint-8500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-8500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-9000/README.md b/sft/checkpoint-9000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-9000/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-9000/adapter_config.json b/sft/checkpoint-9000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-9000/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-9000/adapter_model.safetensors b/sft/checkpoint-9000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7010c33bd0b5f0676a5b96af6812d7f85f95d6e --- /dev/null +++ b/sft/checkpoint-9000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3f2a987328e302fafb0170807eb643b66d753f1295b43a6af291b7b5a8b47a +size 335604696 diff --git a/sft/checkpoint-9000/optimizer.pt b/sft/checkpoint-9000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdff62c345f513f295d8e94a246c5ed128ec2982 --- /dev/null +++ b/sft/checkpoint-9000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5366f37b11b76a2b8d7775b2d3b3eb5572d12ff9ad19e5cdbf196c36271271 +size 671466706 diff --git a/sft/checkpoint-9000/rng_state_0.pth b/sft/checkpoint-9000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..9766f45df3a74867cc363e94002b595135330800 --- /dev/null +++ b/sft/checkpoint-9000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057b34b5b5eda4d50e41eddb0de6d0e5cd5cb582cb69ff77db9d49a24471df85 +size 14512 diff --git a/sft/checkpoint-9000/rng_state_1.pth b/sft/checkpoint-9000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..511e7d2b7f0f373b367f9915935fa27911bfa5f1 --- /dev/null +++ b/sft/checkpoint-9000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41bdb548c213b4ddd32c8ec4e96c63d95950246b61e054468efb9f77d2c4ecf2 +size 14512 diff --git a/sft/checkpoint-9000/scaler.pt b/sft/checkpoint-9000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..be78cf37947b33335dd0088db033bce3a810aaff --- /dev/null +++ b/sft/checkpoint-9000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ded2dd935ae416f16f5ec0cf031b9e068c03e605188b78c596c75bea9a439f +size 988 diff --git a/sft/checkpoint-9000/scheduler.pt b/sft/checkpoint-9000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c3ff19cc3996c2d31f3c9b4ef83bc6c53b8d860 --- /dev/null +++ b/sft/checkpoint-9000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c83f3252e0e1fd26333c88c9e90e2360a4f807e03115ea2b8ecceba13ba555b0 +size 1064 diff --git a/sft/checkpoint-9000/special_tokens_map.json b/sft/checkpoint-9000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-9000/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-9000/tokenizer.json b/sft/checkpoint-9000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-9000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-9000/tokenizer_config.json b/sft/checkpoint-9000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-9000/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-9000/trainer_state.json b/sft/checkpoint-9000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e7cd21c9754d465c09eb3e8f9b949b85d50a3a69 --- /dev/null +++ b/sft/checkpoint-9000/trainer_state.json @@ -0,0 +1,1294 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9410115388763076, + "eval_steps": 500, + "global_step": 9000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.02823262815136e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-9000/training_args.bin b/sft/checkpoint-9000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-9000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/checkpoint-9500/README.md b/sft/checkpoint-9500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..509c4a7507b81c6031600af47780548d02aa948d --- /dev/null +++ b/sft/checkpoint-9500/README.md @@ -0,0 +1,207 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Meta-Llama-3-8B +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.16.0 \ No newline at end of file diff --git a/sft/checkpoint-9500/adapter_config.json b/sft/checkpoint-9500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac0a21bcd0c7f3639ace0416715e8867f29642 --- /dev/null +++ b/sft/checkpoint-9500/adapter_config.json @@ -0,0 +1,41 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "o_proj", + "down_proj", + "up_proj", + "k_proj", + "gate_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/sft/checkpoint-9500/adapter_model.safetensors b/sft/checkpoint-9500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84ecdeb3dfbd05ab801374b192b2665153810e1b --- /dev/null +++ b/sft/checkpoint-9500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49b71d94e5365f69adf7117f0db5021b1a7309e5ff78f17377c36580e866079 +size 335604696 diff --git a/sft/checkpoint-9500/optimizer.pt b/sft/checkpoint-9500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..63ae983f2223004bb2fa5a6e4da5819494233958 --- /dev/null +++ b/sft/checkpoint-9500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2ffa2e339b77377e992d2f15db8a6e7c8c96b336924bc819b26926e673950f4 +size 671466706 diff --git a/sft/checkpoint-9500/rng_state_0.pth b/sft/checkpoint-9500/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2efd85404673094f5517b1e85454eef3de6ea3b --- /dev/null +++ b/sft/checkpoint-9500/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d207584a5b16017183a26b28cb45bea9b90d7a6432fb2afc0e057f7bbf6606e2 +size 14512 diff --git a/sft/checkpoint-9500/rng_state_1.pth b/sft/checkpoint-9500/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..dddd44c8f02535e817fe38627263cc24da59876c --- /dev/null +++ b/sft/checkpoint-9500/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0637c16c83437a7eeff7cb43a201723e4afd71347a237807f7af280d407df6c1 +size 14512 diff --git a/sft/checkpoint-9500/scaler.pt b/sft/checkpoint-9500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5eedf0bd6625cbcf121599d76a73729dc8962c07 --- /dev/null +++ b/sft/checkpoint-9500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e979c447d0ab4d7c4e87fe375abb572391b980b029d423a19c8846b75173272 +size 988 diff --git a/sft/checkpoint-9500/scheduler.pt b/sft/checkpoint-9500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea86ecd84d9fbc115766379967ea428007cd250d --- /dev/null +++ b/sft/checkpoint-9500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:693ca3dc2a342cb457e6ebf221b06b16a3717f1d20be39b3387734d742fd5172 +size 1064 diff --git a/sft/checkpoint-9500/special_tokens_map.json b/sft/checkpoint-9500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/checkpoint-9500/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/checkpoint-9500/tokenizer.json b/sft/checkpoint-9500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/checkpoint-9500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/checkpoint-9500/tokenizer_config.json b/sft/checkpoint-9500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/checkpoint-9500/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/sft/checkpoint-9500/trainer_state.json b/sft/checkpoint-9500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e4c75aaedda4b9a0f1fe51e307ea6d5ec1c7d51f --- /dev/null +++ b/sft/checkpoint-9500/trainer_state.json @@ -0,0 +1,1364 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0487436644020276, + "eval_steps": 500, + "global_step": 9500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010783996549121105, + "grad_norm": 0.2076384574174881, + "learning_rate": 5.861244019138756e-06, + "loss": 1.0803, + "step": 50 + }, + { + "epoch": 0.02156799309824221, + "grad_norm": 0.25434058904647827, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0521, + "step": 100 + }, + { + "epoch": 0.03235198964736331, + "grad_norm": 0.24684220552444458, + "learning_rate": 1.7822966507177032e-05, + "loss": 1.0288, + "step": 150 + }, + { + "epoch": 0.04313598619648442, + "grad_norm": 0.3034498691558838, + "learning_rate": 2.380382775119617e-05, + "loss": 0.9972, + "step": 200 + }, + { + "epoch": 0.05391998274560552, + "grad_norm": 0.2725016176700592, + "learning_rate": 2.9784688995215314e-05, + "loss": 0.9555, + "step": 250 + }, + { + "epoch": 0.06470397929472663, + "grad_norm": 0.27356916666030884, + "learning_rate": 3.576555023923445e-05, + "loss": 0.9688, + "step": 300 + }, + { + "epoch": 0.07548797584384773, + "grad_norm": 0.2624454200267792, + "learning_rate": 4.174641148325359e-05, + "loss": 0.9699, + "step": 350 + }, + { + "epoch": 0.08627197239296884, + "grad_norm": 0.2676330506801605, + "learning_rate": 4.772727272727273e-05, + "loss": 0.9739, + "step": 400 + }, + { + "epoch": 0.09705596894208994, + "grad_norm": 0.24369767308235168, + "learning_rate": 4.999934880025785e-05, + "loss": 0.9833, + "step": 450 + }, + { + "epoch": 0.10783996549121104, + "grad_norm": 0.26960158348083496, + "learning_rate": 4.9995554200393156e-05, + "loss": 0.9677, + "step": 500 + }, + { + "epoch": 0.11862396204033215, + "grad_norm": 0.2564559578895569, + "learning_rate": 4.998837209058379e-05, + "loss": 0.9493, + "step": 550 + }, + { + "epoch": 0.12940795858945325, + "grad_norm": 0.23627087473869324, + "learning_rate": 4.9977803444181587e-05, + "loss": 0.9726, + "step": 600 + }, + { + "epoch": 0.14019195513857435, + "grad_norm": 0.22857290506362915, + "learning_rate": 4.996384969349704e-05, + "loss": 0.9653, + "step": 650 + }, + { + "epoch": 0.15097595168769545, + "grad_norm": 0.25175178050994873, + "learning_rate": 4.9946512729605226e-05, + "loss": 0.9725, + "step": 700 + }, + { + "epoch": 0.16175994823681655, + "grad_norm": 0.20284195244312286, + "learning_rate": 4.992579490208947e-05, + "loss": 0.968, + "step": 750 + }, + { + "epoch": 0.17254394478593768, + "grad_norm": 0.228809654712677, + "learning_rate": 4.990169901872295e-05, + "loss": 0.9338, + "step": 800 + }, + { + "epoch": 0.18332794133505878, + "grad_norm": 0.2436237633228302, + "learning_rate": 4.987422834508818e-05, + "loss": 0.9581, + "step": 850 + }, + { + "epoch": 0.19411193788417988, + "grad_norm": 0.2001142054796219, + "learning_rate": 4.9843386604134425e-05, + "loss": 0.9512, + "step": 900 + }, + { + "epoch": 0.20489593443330098, + "grad_norm": 0.20406965911388397, + "learning_rate": 4.980917797567315e-05, + "loss": 0.9479, + "step": 950 + }, + { + "epoch": 0.21567993098242208, + "grad_norm": 0.20756883919239044, + "learning_rate": 4.9771607095811565e-05, + "loss": 0.9552, + "step": 1000 + }, + { + "epoch": 0.22646392753154318, + "grad_norm": 0.23893098533153534, + "learning_rate": 4.9730679056324334e-05, + "loss": 0.9732, + "step": 1050 + }, + { + "epoch": 0.2372479240806643, + "grad_norm": 0.20374947786331177, + "learning_rate": 4.968639940396346e-05, + "loss": 0.961, + "step": 1100 + }, + { + "epoch": 0.2480319206297854, + "grad_norm": 0.20845109224319458, + "learning_rate": 4.963877413970663e-05, + "loss": 0.9481, + "step": 1150 + }, + { + "epoch": 0.2588159171789065, + "grad_norm": 0.23683245480060577, + "learning_rate": 4.958780971794388e-05, + "loss": 0.9558, + "step": 1200 + }, + { + "epoch": 0.2695999137280276, + "grad_norm": 0.18015944957733154, + "learning_rate": 4.953351304560292e-05, + "loss": 0.9367, + "step": 1250 + }, + { + "epoch": 0.2803839102771487, + "grad_norm": 0.21432434022426605, + "learning_rate": 4.947589148121301e-05, + "loss": 0.9289, + "step": 1300 + }, + { + "epoch": 0.2911679068262698, + "grad_norm": 0.217897430062294, + "learning_rate": 4.941495283390778e-05, + "loss": 0.9663, + "step": 1350 + }, + { + "epoch": 0.3019519033753909, + "grad_norm": 0.23911495506763458, + "learning_rate": 4.9350705362366836e-05, + "loss": 0.9534, + "step": 1400 + }, + { + "epoch": 0.312735899924512, + "grad_norm": 0.21729810535907745, + "learning_rate": 4.928315777369652e-05, + "loss": 0.9663, + "step": 1450 + }, + { + "epoch": 0.3235198964736331, + "grad_norm": 0.19448955357074738, + "learning_rate": 4.9212319222249914e-05, + "loss": 0.9203, + "step": 1500 + }, + { + "epoch": 0.3343038930227542, + "grad_norm": 0.20799997448921204, + "learning_rate": 4.913819930838616e-05, + "loss": 0.9426, + "step": 1550 + }, + { + "epoch": 0.34508788957187536, + "grad_norm": 0.1989525556564331, + "learning_rate": 4.906080807716941e-05, + "loss": 0.9544, + "step": 1600 + }, + { + "epoch": 0.35587188612099646, + "grad_norm": 0.21680687367916107, + "learning_rate": 4.898015601700745e-05, + "loss": 0.9666, + "step": 1650 + }, + { + "epoch": 0.36665588267011756, + "grad_norm": 0.2180759161710739, + "learning_rate": 4.889625405823027e-05, + "loss": 0.9441, + "step": 1700 + }, + { + "epoch": 0.37743987921923866, + "grad_norm": 0.19334350526332855, + "learning_rate": 4.880911357160877e-05, + "loss": 0.9415, + "step": 1750 + }, + { + "epoch": 0.38822387576835976, + "grad_norm": 0.19350044429302216, + "learning_rate": 4.871874636681366e-05, + "loss": 0.9534, + "step": 1800 + }, + { + "epoch": 0.39900787231748086, + "grad_norm": 0.23279784619808197, + "learning_rate": 4.862516469081505e-05, + "loss": 0.9578, + "step": 1850 + }, + { + "epoch": 0.40979186886660196, + "grad_norm": 0.2038542479276657, + "learning_rate": 4.852838122622264e-05, + "loss": 0.9416, + "step": 1900 + }, + { + "epoch": 0.42057586541572306, + "grad_norm": 0.21980704367160797, + "learning_rate": 4.842840908956692e-05, + "loss": 0.9359, + "step": 1950 + }, + { + "epoch": 0.43135986196484416, + "grad_norm": 0.20842380821704865, + "learning_rate": 4.832526182952156e-05, + "loss": 0.9495, + "step": 2000 + }, + { + "epoch": 0.44214385851396526, + "grad_norm": 0.2161971479654312, + "learning_rate": 4.821895342506724e-05, + "loss": 0.9388, + "step": 2050 + }, + { + "epoch": 0.45292785506308636, + "grad_norm": 0.2119661122560501, + "learning_rate": 4.8109498283597146e-05, + "loss": 0.9618, + "step": 2100 + }, + { + "epoch": 0.46371185161220746, + "grad_norm": 0.17877915501594543, + "learning_rate": 4.799691123896441e-05, + "loss": 0.9498, + "step": 2150 + }, + { + "epoch": 0.4744958481613286, + "grad_norm": 0.2198779135942459, + "learning_rate": 4.788120754947179e-05, + "loss": 0.9464, + "step": 2200 + }, + { + "epoch": 0.4852798447104497, + "grad_norm": 0.20385344326496124, + "learning_rate": 4.7762402895803763e-05, + "loss": 0.9423, + "step": 2250 + }, + { + "epoch": 0.4960638412595708, + "grad_norm": 0.21472816169261932, + "learning_rate": 4.764051337890143e-05, + "loss": 0.9295, + "step": 2300 + }, + { + "epoch": 0.5068478378086919, + "grad_norm": 0.21423693001270294, + "learning_rate": 4.7515555517780405e-05, + "loss": 0.9557, + "step": 2350 + }, + { + "epoch": 0.517631834357813, + "grad_norm": 0.2088768184185028, + "learning_rate": 4.7387546247292156e-05, + "loss": 0.9392, + "step": 2400 + }, + { + "epoch": 0.5284158309069341, + "grad_norm": 0.18323567509651184, + "learning_rate": 4.725650291582885e-05, + "loss": 0.9418, + "step": 2450 + }, + { + "epoch": 0.5391998274560552, + "grad_norm": 0.22341737151145935, + "learning_rate": 4.712244328297224e-05, + "loss": 0.9207, + "step": 2500 + }, + { + "epoch": 0.5499838240051763, + "grad_norm": 0.2024504542350769, + "learning_rate": 4.698538551708682e-05, + "loss": 0.9337, + "step": 2550 + }, + { + "epoch": 0.5607678205542974, + "grad_norm": 0.20455148816108704, + "learning_rate": 4.684534819285758e-05, + "loss": 0.9451, + "step": 2600 + }, + { + "epoch": 0.5715518171034185, + "grad_norm": 0.19093358516693115, + "learning_rate": 4.6702350288772626e-05, + "loss": 0.9468, + "step": 2650 + }, + { + "epoch": 0.5823358136525396, + "grad_norm": 0.1995963305234909, + "learning_rate": 4.6556411184551176e-05, + "loss": 0.9373, + "step": 2700 + }, + { + "epoch": 0.5931198102016607, + "grad_norm": 0.19664354622364044, + "learning_rate": 4.640755065851712e-05, + "loss": 0.9609, + "step": 2750 + }, + { + "epoch": 0.6039038067507818, + "grad_norm": 0.20155999064445496, + "learning_rate": 4.6255788884918595e-05, + "loss": 0.9221, + "step": 2800 + }, + { + "epoch": 0.6146878032999029, + "grad_norm": 0.2094108611345291, + "learning_rate": 4.610114643119382e-05, + "loss": 0.9665, + "step": 2850 + }, + { + "epoch": 0.625471799849024, + "grad_norm": 0.23038670420646667, + "learning_rate": 4.5943644255183785e-05, + "loss": 0.9223, + "step": 2900 + }, + { + "epoch": 0.6362557963981451, + "grad_norm": 0.22103433310985565, + "learning_rate": 4.5783303702291856e-05, + "loss": 0.9271, + "step": 2950 + }, + { + "epoch": 0.6470397929472662, + "grad_norm": 0.21444232761859894, + "learning_rate": 4.5620146502591065e-05, + "loss": 0.9553, + "step": 3000 + }, + { + "epoch": 0.6578237894963873, + "grad_norm": 0.20402322709560394, + "learning_rate": 4.5454194767879046e-05, + "loss": 0.9342, + "step": 3050 + }, + { + "epoch": 0.6686077860455084, + "grad_norm": 0.17598140239715576, + "learning_rate": 4.52854709886814e-05, + "loss": 0.9343, + "step": 3100 + }, + { + "epoch": 0.6793917825946296, + "grad_norm": 0.2235531210899353, + "learning_rate": 4.511399803120367e-05, + "loss": 0.9325, + "step": 3150 + }, + { + "epoch": 0.6901757791437507, + "grad_norm": 0.1978316605091095, + "learning_rate": 4.49397991342324e-05, + "loss": 0.9175, + "step": 3200 + }, + { + "epoch": 0.7009597756928718, + "grad_norm": 0.20724375545978546, + "learning_rate": 4.476289790598571e-05, + "loss": 0.9509, + "step": 3250 + }, + { + "epoch": 0.7117437722419929, + "grad_norm": 0.19276615977287292, + "learning_rate": 4.458331832091385e-05, + "loss": 0.9247, + "step": 3300 + }, + { + "epoch": 0.722527768791114, + "grad_norm": 0.2208387851715088, + "learning_rate": 4.440108471644997e-05, + "loss": 0.9409, + "step": 3350 + }, + { + "epoch": 0.7333117653402351, + "grad_norm": 0.21308571100234985, + "learning_rate": 4.421622178971193e-05, + "loss": 0.9267, + "step": 3400 + }, + { + "epoch": 0.7440957618893562, + "grad_norm": 0.2115100473165512, + "learning_rate": 4.4028754594155125e-05, + "loss": 0.933, + "step": 3450 + }, + { + "epoch": 0.7548797584384773, + "grad_norm": 0.21246980130672455, + "learning_rate": 4.383870853617721e-05, + "loss": 0.9422, + "step": 3500 + }, + { + "epoch": 0.7656637549875984, + "grad_norm": 0.2082446962594986, + "learning_rate": 4.364610937167485e-05, + "loss": 0.9204, + "step": 3550 + }, + { + "epoch": 0.7764477515367195, + "grad_norm": 0.22102369368076324, + "learning_rate": 4.345098320255321e-05, + "loss": 0.9226, + "step": 3600 + }, + { + "epoch": 0.7872317480858406, + "grad_norm": 0.19831791520118713, + "learning_rate": 4.325335647318848e-05, + "loss": 0.9327, + "step": 3650 + }, + { + "epoch": 0.7980157446349617, + "grad_norm": 0.2220238745212555, + "learning_rate": 4.3053255966844016e-05, + "loss": 0.9318, + "step": 3700 + }, + { + "epoch": 0.8087997411840828, + "grad_norm": 0.20910035073757172, + "learning_rate": 4.285070880204057e-05, + "loss": 0.9306, + "step": 3750 + }, + { + "epoch": 0.8195837377332039, + "grad_norm": 0.21745839715003967, + "learning_rate": 4.264574242888105e-05, + "loss": 0.9304, + "step": 3800 + }, + { + "epoch": 0.830367734282325, + "grad_norm": 0.24437028169631958, + "learning_rate": 4.2438384625330374e-05, + "loss": 0.9433, + "step": 3850 + }, + { + "epoch": 0.8411517308314461, + "grad_norm": 0.2319614738225937, + "learning_rate": 4.222866349345083e-05, + "loss": 0.9536, + "step": 3900 + }, + { + "epoch": 0.8519357273805672, + "grad_norm": 0.2375030517578125, + "learning_rate": 4.2016607455593624e-05, + "loss": 0.9421, + "step": 3950 + }, + { + "epoch": 0.8627197239296883, + "grad_norm": 0.2176317423582077, + "learning_rate": 4.1802245250546926e-05, + "loss": 0.9268, + "step": 4000 + }, + { + "epoch": 0.8735037204788094, + "grad_norm": 0.2226661890745163, + "learning_rate": 4.158560592964104e-05, + "loss": 0.925, + "step": 4050 + }, + { + "epoch": 0.8842877170279305, + "grad_norm": 0.2202196568250656, + "learning_rate": 4.136671885281124e-05, + "loss": 0.9465, + "step": 4100 + }, + { + "epoch": 0.8950717135770516, + "grad_norm": 0.20654049515724182, + "learning_rate": 4.114561368461884e-05, + "loss": 0.9251, + "step": 4150 + }, + { + "epoch": 0.9058557101261727, + "grad_norm": 0.23357035219669342, + "learning_rate": 4.092232039023084e-05, + "loss": 0.9417, + "step": 4200 + }, + { + "epoch": 0.9166397066752938, + "grad_norm": 0.20816297829151154, + "learning_rate": 4.069686923135896e-05, + "loss": 0.9225, + "step": 4250 + }, + { + "epoch": 0.9274237032244149, + "grad_norm": 0.20184196531772614, + "learning_rate": 4.04692907621584e-05, + "loss": 0.9212, + "step": 4300 + }, + { + "epoch": 0.938207699773536, + "grad_norm": 0.1984609067440033, + "learning_rate": 4.023961582508704e-05, + "loss": 0.9261, + "step": 4350 + }, + { + "epoch": 0.9489916963226572, + "grad_norm": 0.22444488108158112, + "learning_rate": 4.000787554672553e-05, + "loss": 0.9291, + "step": 4400 + }, + { + "epoch": 0.9597756928717783, + "grad_norm": 0.21115441620349884, + "learning_rate": 3.977410133355884e-05, + "loss": 0.9349, + "step": 4450 + }, + { + "epoch": 0.9705596894208994, + "grad_norm": 0.19569146633148193, + "learning_rate": 3.953832486771996e-05, + "loss": 0.9049, + "step": 4500 + }, + { + "epoch": 0.9813436859700205, + "grad_norm": 0.22996151447296143, + "learning_rate": 3.930057810269612e-05, + "loss": 0.894, + "step": 4550 + }, + { + "epoch": 0.9921276825191416, + "grad_norm": 0.19879557192325592, + "learning_rate": 3.906089325899841e-05, + "loss": 0.955, + "step": 4600 + }, + { + "epoch": 1.0028038391027714, + "grad_norm": 0.207550510764122, + "learning_rate": 3.8819302819795046e-05, + "loss": 0.9362, + "step": 4650 + }, + { + "epoch": 1.0135878356518926, + "grad_norm": 0.20435990393161774, + "learning_rate": 3.8575839526509105e-05, + "loss": 0.9217, + "step": 4700 + }, + { + "epoch": 1.0243718322010138, + "grad_norm": 0.22362500429153442, + "learning_rate": 3.833053637438128e-05, + "loss": 0.9342, + "step": 4750 + }, + { + "epoch": 1.0351558287501348, + "grad_norm": 0.18318387866020203, + "learning_rate": 3.8083426607998216e-05, + "loss": 0.8937, + "step": 4800 + }, + { + "epoch": 1.045939825299256, + "grad_norm": 0.20834890007972717, + "learning_rate": 3.783454371678705e-05, + "loss": 0.9103, + "step": 4850 + }, + { + "epoch": 1.056723821848377, + "grad_norm": 0.2138434648513794, + "learning_rate": 3.758392143047677e-05, + "loss": 0.9003, + "step": 4900 + }, + { + "epoch": 1.0675078183974982, + "grad_norm": 0.21266281604766846, + "learning_rate": 3.733159371452701e-05, + "loss": 0.9142, + "step": 4950 + }, + { + "epoch": 1.0782918149466192, + "grad_norm": 0.25879135727882385, + "learning_rate": 3.707759476552489e-05, + "loss": 0.8976, + "step": 5000 + }, + { + "epoch": 1.0890758114957404, + "grad_norm": 0.2042112946510315, + "learning_rate": 3.682195900655057e-05, + "loss": 0.9092, + "step": 5050 + }, + { + "epoch": 1.0998598080448614, + "grad_norm": 0.25018027424812317, + "learning_rate": 3.656472108251205e-05, + "loss": 0.8843, + "step": 5100 + }, + { + "epoch": 1.1106438045939826, + "grad_norm": 0.2371663898229599, + "learning_rate": 3.630591585544995e-05, + "loss": 0.8764, + "step": 5150 + }, + { + "epoch": 1.1214278011431036, + "grad_norm": 0.23503442108631134, + "learning_rate": 3.604557839981284e-05, + "loss": 0.9091, + "step": 5200 + }, + { + "epoch": 1.1322117976922248, + "grad_norm": 0.24042187631130219, + "learning_rate": 3.5783743997703824e-05, + "loss": 0.9206, + "step": 5250 + }, + { + "epoch": 1.1429957942413458, + "grad_norm": 0.25456419587135315, + "learning_rate": 3.5520448134098886e-05, + "loss": 0.8784, + "step": 5300 + }, + { + "epoch": 1.153779790790467, + "grad_norm": 0.23184941709041595, + "learning_rate": 3.5255726492037854e-05, + "loss": 0.8798, + "step": 5350 + }, + { + "epoch": 1.164563787339588, + "grad_norm": 0.24035029113292694, + "learning_rate": 3.498961494778851e-05, + "loss": 0.9039, + "step": 5400 + }, + { + "epoch": 1.1753477838887092, + "grad_norm": 0.24733129143714905, + "learning_rate": 3.4722149565984385e-05, + "loss": 0.9094, + "step": 5450 + }, + { + "epoch": 1.1861317804378302, + "grad_norm": 0.25908830761909485, + "learning_rate": 3.445336659473718e-05, + "loss": 0.9167, + "step": 5500 + }, + { + "epoch": 1.1969157769869514, + "grad_norm": 0.24497312307357788, + "learning_rate": 3.4183302460724246e-05, + "loss": 0.8919, + "step": 5550 + }, + { + "epoch": 1.2076997735360724, + "grad_norm": 0.24705035984516144, + "learning_rate": 3.391199376425188e-05, + "loss": 0.9018, + "step": 5600 + }, + { + "epoch": 1.2184837700851936, + "grad_norm": 0.2370757907629013, + "learning_rate": 3.363947727429507e-05, + "loss": 0.8925, + "step": 5650 + }, + { + "epoch": 1.2292677666343146, + "grad_norm": 0.24430540204048157, + "learning_rate": 3.336578992351442e-05, + "loss": 0.8834, + "step": 5700 + }, + { + "epoch": 1.2400517631834358, + "grad_norm": 0.20415450632572174, + "learning_rate": 3.3090968803250856e-05, + "loss": 0.9195, + "step": 5750 + }, + { + "epoch": 1.2508357597325568, + "grad_norm": 0.24224655330181122, + "learning_rate": 3.281505115849885e-05, + "loss": 0.8963, + "step": 5800 + }, + { + "epoch": 1.261619756281678, + "grad_norm": 0.263614684343338, + "learning_rate": 3.253807438285879e-05, + "loss": 0.9081, + "step": 5850 + }, + { + "epoch": 1.2724037528307992, + "grad_norm": 0.22934329509735107, + "learning_rate": 3.226007601346927e-05, + "loss": 0.8957, + "step": 5900 + }, + { + "epoch": 1.2831877493799202, + "grad_norm": 0.2595406770706177, + "learning_rate": 3.198109372591984e-05, + "loss": 0.8798, + "step": 5950 + }, + { + "epoch": 1.2939717459290412, + "grad_norm": 0.2610589861869812, + "learning_rate": 3.170677292377989e-05, + "loss": 0.9074, + "step": 6000 + }, + { + "epoch": 1.3047557424781624, + "grad_norm": 0.27022746205329895, + "learning_rate": 3.142595414578805e-05, + "loss": 0.9059, + "step": 6050 + }, + { + "epoch": 1.3155397390272836, + "grad_norm": 0.21983672678470612, + "learning_rate": 3.114426449358401e-05, + "loss": 0.9179, + "step": 6100 + }, + { + "epoch": 1.3263237355764046, + "grad_norm": 0.22227706015110016, + "learning_rate": 3.086174214301658e-05, + "loss": 0.8916, + "step": 6150 + }, + { + "epoch": 1.3371077321255256, + "grad_norm": 0.2406383454799652, + "learning_rate": 3.05784253827856e-05, + "loss": 0.8994, + "step": 6200 + }, + { + "epoch": 1.3478917286746468, + "grad_norm": 0.23662422597408295, + "learning_rate": 3.029435260925288e-05, + "loss": 0.893, + "step": 6250 + }, + { + "epoch": 1.358675725223768, + "grad_norm": 0.26936379075050354, + "learning_rate": 3.000956232123856e-05, + "loss": 0.9033, + "step": 6300 + }, + { + "epoch": 1.369459721772889, + "grad_norm": 0.253090500831604, + "learning_rate": 2.972409311480357e-05, + "loss": 0.8867, + "step": 6350 + }, + { + "epoch": 1.3802437183220102, + "grad_norm": 0.2847846746444702, + "learning_rate": 2.94379836780189e-05, + "loss": 0.8721, + "step": 6400 + }, + { + "epoch": 1.3910277148711312, + "grad_norm": 0.26056525111198425, + "learning_rate": 2.9151272785722466e-05, + "loss": 0.8913, + "step": 6450 + }, + { + "epoch": 1.4018117114202524, + "grad_norm": 0.23132337629795074, + "learning_rate": 2.8863999294264122e-05, + "loss": 0.9058, + "step": 6500 + }, + { + "epoch": 1.4125957079693734, + "grad_norm": 0.2190658152103424, + "learning_rate": 2.8576202136239688e-05, + "loss": 0.8906, + "step": 6550 + }, + { + "epoch": 1.4233797045184946, + "grad_norm": 0.26291966438293457, + "learning_rate": 2.8287920315214643e-05, + "loss": 0.9229, + "step": 6600 + }, + { + "epoch": 1.4341637010676156, + "grad_norm": 0.23218290507793427, + "learning_rate": 2.799919290043818e-05, + "loss": 0.9242, + "step": 6650 + }, + { + "epoch": 1.4449476976167368, + "grad_norm": 0.2565305233001709, + "learning_rate": 2.7710059021548344e-05, + "loss": 0.883, + "step": 6700 + }, + { + "epoch": 1.4557316941658578, + "grad_norm": 0.2470102459192276, + "learning_rate": 2.7420557863269043e-05, + "loss": 0.8949, + "step": 6750 + }, + { + "epoch": 1.466515690714979, + "grad_norm": 0.25169292092323303, + "learning_rate": 2.713072866009953e-05, + "loss": 0.9122, + "step": 6800 + }, + { + "epoch": 1.4772996872641002, + "grad_norm": 0.23668742179870605, + "learning_rate": 2.6840610690997182e-05, + "loss": 0.8919, + "step": 6850 + }, + { + "epoch": 1.4880836838132212, + "grad_norm": 0.2786126732826233, + "learning_rate": 2.655024327405422e-05, + "loss": 0.8883, + "step": 6900 + }, + { + "epoch": 1.4988676803623422, + "grad_norm": 0.25976258516311646, + "learning_rate": 2.6259665761169183e-05, + "loss": 0.9291, + "step": 6950 + }, + { + "epoch": 1.5096516769114634, + "grad_norm": 0.2566768229007721, + "learning_rate": 2.5968917532713743e-05, + "loss": 0.901, + "step": 7000 + }, + { + "epoch": 1.5204356734605846, + "grad_norm": 0.24728557467460632, + "learning_rate": 2.5678037992195714e-05, + "loss": 0.8811, + "step": 7050 + }, + { + "epoch": 1.5312196700097056, + "grad_norm": 0.24409767985343933, + "learning_rate": 2.5387066560918906e-05, + "loss": 0.904, + "step": 7100 + }, + { + "epoch": 1.5420036665588266, + "grad_norm": 0.2483212798833847, + "learning_rate": 2.5096042672640596e-05, + "loss": 0.8945, + "step": 7150 + }, + { + "epoch": 1.5527876631079478, + "grad_norm": 0.23452620208263397, + "learning_rate": 2.4805005768227252e-05, + "loss": 0.9063, + "step": 7200 + }, + { + "epoch": 1.563571659657069, + "grad_norm": 0.22194162011146545, + "learning_rate": 2.4513995290309358e-05, + "loss": 0.8834, + "step": 7250 + }, + { + "epoch": 1.57435565620619, + "grad_norm": 0.25706538558006287, + "learning_rate": 2.4223050677935947e-05, + "loss": 0.9149, + "step": 7300 + }, + { + "epoch": 1.585139652755311, + "grad_norm": 0.2703045606613159, + "learning_rate": 2.3932211361229683e-05, + "loss": 0.9059, + "step": 7350 + }, + { + "epoch": 1.5959236493044322, + "grad_norm": 0.26212379336357117, + "learning_rate": 2.3641516756043053e-05, + "loss": 0.8996, + "step": 7400 + }, + { + "epoch": 1.6067076458535534, + "grad_norm": 0.241121307015419, + "learning_rate": 2.3351006258616618e-05, + "loss": 0.8934, + "step": 7450 + }, + { + "epoch": 1.6174916424026744, + "grad_norm": 0.2937757968902588, + "learning_rate": 2.3060719240239807e-05, + "loss": 0.8907, + "step": 7500 + }, + { + "epoch": 1.6282756389517954, + "grad_norm": 0.2826499938964844, + "learning_rate": 2.2770695041915187e-05, + "loss": 0.8963, + "step": 7550 + }, + { + "epoch": 1.6390596355009166, + "grad_norm": 0.2622433602809906, + "learning_rate": 2.248097296902672e-05, + "loss": 0.8797, + "step": 7600 + }, + { + "epoch": 1.6498436320500378, + "grad_norm": 0.26400211453437805, + "learning_rate": 2.2191592286013042e-05, + "loss": 0.9084, + "step": 7650 + }, + { + "epoch": 1.6606276285991588, + "grad_norm": 0.25721365213394165, + "learning_rate": 2.1902592211046032e-05, + "loss": 0.882, + "step": 7700 + }, + { + "epoch": 1.6714116251482798, + "grad_norm": 0.25235188007354736, + "learning_rate": 2.1614011910715896e-05, + "loss": 0.9306, + "step": 7750 + }, + { + "epoch": 1.6821956216974012, + "grad_norm": 0.2521611154079437, + "learning_rate": 2.1325890494723065e-05, + "loss": 0.8911, + "step": 7800 + }, + { + "epoch": 1.6929796182465222, + "grad_norm": 0.2881399691104889, + "learning_rate": 2.103826701057793e-05, + "loss": 0.8837, + "step": 7850 + }, + { + "epoch": 1.7037636147956432, + "grad_norm": 0.2743209898471832, + "learning_rate": 2.075118043830888e-05, + "loss": 0.9072, + "step": 7900 + }, + { + "epoch": 1.7145476113447644, + "grad_norm": 0.2475823312997818, + "learning_rate": 2.046466968517963e-05, + "loss": 0.9109, + "step": 7950 + }, + { + "epoch": 1.7253316078938856, + "grad_norm": 0.28786906599998474, + "learning_rate": 2.0178773580416263e-05, + "loss": 0.9085, + "step": 8000 + }, + { + "epoch": 1.7361156044430066, + "grad_norm": 0.2793081998825073, + "learning_rate": 1.9893530869944986e-05, + "loss": 0.8721, + "step": 8050 + }, + { + "epoch": 1.7468996009921276, + "grad_norm": 0.26357826590538025, + "learning_rate": 1.9608980211141028e-05, + "loss": 0.9014, + "step": 8100 + }, + { + "epoch": 1.7576835975412488, + "grad_norm": 0.26504483819007874, + "learning_rate": 1.93251601675897e-05, + "loss": 0.9091, + "step": 8150 + }, + { + "epoch": 1.76846759409037, + "grad_norm": 0.26386550068855286, + "learning_rate": 1.9042109203860027e-05, + "loss": 0.8985, + "step": 8200 + }, + { + "epoch": 1.779251590639491, + "grad_norm": 0.2590016722679138, + "learning_rate": 1.87598656802919e-05, + "loss": 0.8865, + "step": 8250 + }, + { + "epoch": 1.790035587188612, + "grad_norm": 0.2528024911880493, + "learning_rate": 1.8478467847797238e-05, + "loss": 0.9046, + "step": 8300 + }, + { + "epoch": 1.8008195837377332, + "grad_norm": 0.27202916145324707, + "learning_rate": 1.8197953842676168e-05, + "loss": 0.9021, + "step": 8350 + }, + { + "epoch": 1.8116035802868544, + "grad_norm": 0.240274578332901, + "learning_rate": 1.7918361681448504e-05, + "loss": 0.8921, + "step": 8400 + }, + { + "epoch": 1.8223875768359754, + "grad_norm": 0.29021942615509033, + "learning_rate": 1.7639729255701655e-05, + "loss": 0.9074, + "step": 8450 + }, + { + "epoch": 1.8331715733850964, + "grad_norm": 0.28871750831604004, + "learning_rate": 1.7362094326955336e-05, + "loss": 0.8962, + "step": 8500 + }, + { + "epoch": 1.8439555699342176, + "grad_norm": 0.2800693213939667, + "learning_rate": 1.7085494521544025e-05, + "loss": 0.9222, + "step": 8550 + }, + { + "epoch": 1.8547395664833388, + "grad_norm": 0.2543833255767822, + "learning_rate": 1.6809967325517573e-05, + "loss": 0.8925, + "step": 8600 + }, + { + "epoch": 1.8655235630324598, + "grad_norm": 0.255051851272583, + "learning_rate": 1.6535550079561027e-05, + "loss": 0.8818, + "step": 8650 + }, + { + "epoch": 1.8763075595815808, + "grad_norm": 0.289727121591568, + "learning_rate": 1.6262279973933984e-05, + "loss": 0.8878, + "step": 8700 + }, + { + "epoch": 1.887091556130702, + "grad_norm": 0.2506343424320221, + "learning_rate": 1.5990194043430444e-05, + "loss": 0.8961, + "step": 8750 + }, + { + "epoch": 1.8978755526798232, + "grad_norm": 0.3042599558830261, + "learning_rate": 1.5719329162359638e-05, + "loss": 0.9082, + "step": 8800 + }, + { + "epoch": 1.9086595492289442, + "grad_norm": 0.2791798710823059, + "learning_rate": 1.5449722039548706e-05, + "loss": 0.9023, + "step": 8850 + }, + { + "epoch": 1.9194435457780652, + "grad_norm": 0.2678021788597107, + "learning_rate": 1.5181409213367726e-05, + "loss": 0.8826, + "step": 8900 + }, + { + "epoch": 1.9302275423271864, + "grad_norm": 0.2640957832336426, + "learning_rate": 1.4914427046777879e-05, + "loss": 0.887, + "step": 8950 + }, + { + "epoch": 1.9410115388763076, + "grad_norm": 0.2847963869571686, + "learning_rate": 1.4648811722403358e-05, + "loss": 0.8906, + "step": 9000 + }, + { + "epoch": 1.9517955354254286, + "grad_norm": 0.2558712661266327, + "learning_rate": 1.4384599237627777e-05, + "loss": 0.9006, + "step": 9050 + }, + { + "epoch": 1.9625795319745498, + "grad_norm": 0.26001158356666565, + "learning_rate": 1.4121825399715577e-05, + "loss": 0.902, + "step": 9100 + }, + { + "epoch": 1.973363528523671, + "grad_norm": 0.250234991312027, + "learning_rate": 1.3860525820959358e-05, + "loss": 0.8966, + "step": 9150 + }, + { + "epoch": 1.984147525072792, + "grad_norm": 0.2639175355434418, + "learning_rate": 1.360073591385342e-05, + "loss": 0.9063, + "step": 9200 + }, + { + "epoch": 1.994931521621913, + "grad_norm": 0.2366214245557785, + "learning_rate": 1.334249088629464e-05, + "loss": 0.8907, + "step": 9250 + }, + { + "epoch": 2.0056076782055428, + "grad_norm": 0.291415274143219, + "learning_rate": 1.3085825736810828e-05, + "loss": 0.8729, + "step": 9300 + }, + { + "epoch": 2.016391674754664, + "grad_norm": 0.2706186771392822, + "learning_rate": 1.2830775249817595e-05, + "loss": 0.8663, + "step": 9350 + }, + { + "epoch": 2.027175671303785, + "grad_norm": 0.2555548846721649, + "learning_rate": 1.2577373990904279e-05, + "loss": 0.8663, + "step": 9400 + }, + { + "epoch": 2.037959667852906, + "grad_norm": 0.254191517829895, + "learning_rate": 1.2325656302149374e-05, + "loss": 0.8592, + "step": 9450 + }, + { + "epoch": 2.0487436644020276, + "grad_norm": 0.30470383167266846, + "learning_rate": 1.2075656297466382e-05, + "loss": 0.8938, + "step": 9500 + } + ], + "logging_steps": 50, + "max_steps": 13911, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.2518119593900245e+19, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/sft/checkpoint-9500/training_args.bin b/sft/checkpoint-9500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0d400dd58a69fb3f7fcfc837e7f6d5b15369eb7 --- /dev/null +++ b/sft/checkpoint-9500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1433bfa2d239cb7b9cc930c5807573699adcbd8041b466ac57ad78593ea77 +size 5304 diff --git a/sft/log/events.out.tfevents.1758097194.skyocean b/sft/log/events.out.tfevents.1758097194.skyocean new file mode 100644 index 0000000000000000000000000000000000000000..4a34dd2aa7ef316cbb496bc7075de616443a44ea --- /dev/null +++ b/sft/log/events.out.tfevents.1758097194.skyocean @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a3d453fd236848a32b6323ee767ff51b3a7141532500c77d980fb3c5af523dc +size 4144 diff --git a/sft/log/events.out.tfevents.1758097628.skyocean b/sft/log/events.out.tfevents.1758097628.skyocean new file mode 100644 index 0000000000000000000000000000000000000000..e68bb2d83cabdefa439016defac0c8e0ceca9a09 --- /dev/null +++ b/sft/log/events.out.tfevents.1758097628.skyocean @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616a16913ec1fccc291119210e4ba6aadfba89e9b8543af0b70add85d1896ac3 +size 4917 diff --git a/sft/log/events.out.tfevents.1758098023.skyocean b/sft/log/events.out.tfevents.1758098023.skyocean new file mode 100644 index 0000000000000000000000000000000000000000..39718fb1cb6d8b02e5ace80690dbdabe473f1a0a --- /dev/null +++ b/sft/log/events.out.tfevents.1758098023.skyocean @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8607ca337a47b1d79e1bb16aa66c3a2d5c048f679aa24199745dac4bcc48c6 +size 4917 diff --git a/sft/log/events.out.tfevents.1758098516.skyocean b/sft/log/events.out.tfevents.1758098516.skyocean new file mode 100644 index 0000000000000000000000000000000000000000..1a56d939190545cb69cd2e35c6f8fdbb2610facc --- /dev/null +++ b/sft/log/events.out.tfevents.1758098516.skyocean @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4b7b671d456c4a4381864c46aa2f4f5c55cbabfad1e7cc33f0d757e18eb822 +size 4916 diff --git a/sft/log/events.out.tfevents.1758098944.skyocean b/sft/log/events.out.tfevents.1758098944.skyocean new file mode 100644 index 0000000000000000000000000000000000000000..9767fd32e6b2d2b1c1ed56eaf0b1753d1d0487a8 --- /dev/null +++ b/sft/log/events.out.tfevents.1758098944.skyocean @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72fe1a26fa68a5eaaee94745ba35026a6e790a2e71f02cca382c19f89c090492 +size 63921 diff --git a/sft/special_tokens_map.json b/sft/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..04829afa78a2d2df203ac846968db37269b01f7f --- /dev/null +++ b/sft/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|end_of_text|>" +} diff --git a/sft/tokenizer.json b/sft/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/sft/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/sft/tokenizer_config.json b/sft/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1cb53db01ee34df7e45f212e93089a64707531b --- /dev/null +++ b/sft/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +}