diff --git a/.gitattributes b/.gitattributes index f636faf76033765dedba4428b41a7729e1160312..b72ea28732a4a64a6d06849cabd08ca67c807009 100644 --- a/.gitattributes +++ b/.gitattributes @@ -47,3 +47,9 @@ Qwen2.5-7B-Instruct-lora-2/checkpoint-3000/tokenizer.json filter=lfs diff=lfs me Qwen2.5-7B-Instruct-lora-2/checkpoint-3036/tokenizer.json filter=lfs diff=lfs merge=lfs -text Qwen2.5-7B-Instruct-lora-2/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text Qwen2.5-7B-Instruct-lora-2/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Llama-3.1-8B-Instruct-lora-2/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/Llama-3.1-8B-Instruct-lora-2/README.md b/Llama-3.1-8B-Instruct-lora-2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0a823a1f53fd8bcaf7b46a2b62123009089d70fb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/README.md @@ -0,0 +1,59 @@ +--- +library_name: peft +license: other +base_model: /data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct +tags: +- llama-factory +- lora +- generated_from_trainer +model-index: +- name: Llama-3.1-8B-Instruct-lora-2 + results: [] +--- + + + +# Llama-3.1-8B-Instruct-lora-2 + +This model is a fine-tuned version of [/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct](https://huggingface.co//data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct) on the nlpcc25_task1_train dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 32 +- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 2.0 + +### Training results + + + +### Framework versions + +- PEFT 0.15.0 +- Transformers 4.50.0 +- Pytorch 2.6.0+cu124 +- Datasets 3.4.1 +- Tokenizers 0.21.0 \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/adapter_config.json b/Llama-3.1-8B-Instruct-lora-2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5914f072a289e8f8e163cee6ef6f15c727e47c85 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "q_proj", + "up_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/adapter_model.safetensors b/Llama-3.1-8B-Instruct-lora-2/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c970d7d886dd3a1b4a970acfebfbda006db52b22 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a750af00b88a99412be30c2e37965edd99d811ae8421a68761b0efe29760c252 +size 83945296 diff --git a/Llama-3.1-8B-Instruct-lora-2/all_results.json b/Llama-3.1-8B-Instruct-lora-2/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7011423b2cd58522c1535d9df226620c9fabdd7e --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 1.9985185185185186, + "eval_nlpcc25_task1_dev_accuracy": 0.9964935064935064, + "eval_nlpcc25_task1_dev_loss": 0.021377403289079666, + "eval_nlpcc25_task1_dev_runtime": 3451.6717, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "total_flos": 1.6877866202879754e+18, + "train_loss": 0.023970503359357204, + "train_runtime": 210819.9785, + "train_samples_per_second": 0.307, + "train_steps_per_second": 0.01 +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/README.md b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b3b3f81cc13b36bc8025f54bbde88be77c46fd5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/README.md @@ -0,0 +1,202 @@ +--- +base_model: /data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/adapter_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5914f072a289e8f8e163cee6ef6f15c727e47c85 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "q_proj", + "up_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/adapter_model.safetensors b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..86e8166b40073286226ac5e54c542c12b0a48689 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e1fd038e2484f3a07b20a30dec10c90ad03562c52742ee923b13ce5c06b02e1 +size 83945296 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/optimizer.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..95f0a8fb5838e0d7a81852f5ff12d9b5d08b12bb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c6424eb791cfdaaafb8c2a16d88c9cc38cf31c9d9e36547c02579739bd3b6f3 +size 168149074 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/rng_state.pth b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..323d589b0258d085c7a07b0c4f97b26796e865c5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d +size 14244 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/scheduler.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..901cf0b76b47c8561f427b013e345a50ce5ed040 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f328aba53bb455f69b35f2b61b6f5ecd50dbe85618534e9d48f1d7ddf91601 +size 1064 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/special_tokens_map.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c3a478b842fa66e6a8c10265478284c1d4f41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ca9fbea85debf5fb15193ad8ef61d682f121c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/trainer_state.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a2906adbd714359b18a14a7f4352aa0f357ae6ed --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/trainer_state.json @@ -0,0 +1,752 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9876543209876543, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009876543209876543, + "grad_norm": 8.173213958740234, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.3879, + "step": 10 + }, + { + "epoch": 0.019753086419753086, + "grad_norm": 7.614120960235596, + "learning_rate": 4.926108374384237e-06, + "loss": 1.3407, + "step": 20 + }, + { + "epoch": 0.02962962962962963, + "grad_norm": 6.4321064949035645, + "learning_rate": 7.3891625615763555e-06, + "loss": 1.1236, + "step": 30 + }, + { + "epoch": 0.03950617283950617, + "grad_norm": 2.8025548458099365, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5763, + "step": 40 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 0.7712829113006592, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.1632, + "step": 50 + }, + { + "epoch": 0.05925925925925926, + "grad_norm": 0.35205233097076416, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.0571, + "step": 60 + }, + { + "epoch": 0.0691358024691358, + "grad_norm": 0.9003845453262329, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.0374, + "step": 70 + }, + { + "epoch": 0.07901234567901234, + "grad_norm": 1.4704982042312622, + "learning_rate": 1.970443349753695e-05, + "loss": 0.0226, + "step": 80 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 2.574737310409546, + "learning_rate": 2.2167487684729066e-05, + "loss": 0.0129, + "step": 90 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 5.429731369018555, + "learning_rate": 2.4630541871921184e-05, + "loss": 0.0239, + "step": 100 + }, + { + "epoch": 0.10864197530864197, + "grad_norm": 0.17149314284324646, + "learning_rate": 2.70935960591133e-05, + "loss": 0.0102, + "step": 110 + }, + { + "epoch": 0.11851851851851852, + "grad_norm": 0.3575906753540039, + "learning_rate": 2.9556650246305422e-05, + "loss": 0.0089, + "step": 120 + }, + { + "epoch": 0.12839506172839507, + "grad_norm": 0.30634525418281555, + "learning_rate": 3.2019704433497536e-05, + "loss": 0.0028, + "step": 130 + }, + { + "epoch": 0.1382716049382716, + "grad_norm": 0.006271605845540762, + "learning_rate": 3.4482758620689657e-05, + "loss": 0.0006, + "step": 140 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.01288707833737135, + "learning_rate": 3.694581280788178e-05, + "loss": 0.0008, + "step": 150 + }, + { + "epoch": 0.1580246913580247, + "grad_norm": 0.004254512023180723, + "learning_rate": 3.94088669950739e-05, + "loss": 0.0008, + "step": 160 + }, + { + "epoch": 0.16790123456790124, + "grad_norm": 0.017511000856757164, + "learning_rate": 4.187192118226601e-05, + "loss": 0.0051, + "step": 170 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.001938911504112184, + "learning_rate": 4.433497536945813e-05, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.18765432098765433, + "grad_norm": 0.5761703848838806, + "learning_rate": 4.679802955665025e-05, + "loss": 0.0005, + "step": 190 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 0.0074734254740178585, + "learning_rate": 4.926108374384237e-05, + "loss": 0.0003, + "step": 200 + }, + { + "epoch": 0.2074074074074074, + "grad_norm": 0.0016212465707212687, + "learning_rate": 4.9998177025185267e-05, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 0.21728395061728395, + "grad_norm": 0.0033307652920484543, + "learning_rate": 4.998924880895427e-05, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.2271604938271605, + "grad_norm": 0.009896264411509037, + "learning_rate": 4.997288317313464e-05, + "loss": 0.001, + "step": 230 + }, + { + "epoch": 0.23703703703703705, + "grad_norm": 0.00731613440439105, + "learning_rate": 4.994908498854508e-05, + "loss": 0.0021, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 0.01958666928112507, + "learning_rate": 4.991786133811494e-05, + "loss": 0.0024, + "step": 250 + }, + { + "epoch": 0.25679012345679014, + "grad_norm": 0.008584077470004559, + "learning_rate": 4.9879221514776196e-05, + "loss": 0.0002, + "step": 260 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.0017416627379134297, + "learning_rate": 4.983317701869765e-05, + "loss": 0.0001, + "step": 270 + }, + { + "epoch": 0.2765432098765432, + "grad_norm": 0.8311202526092529, + "learning_rate": 4.977974155386214e-05, + "loss": 0.0014, + "step": 280 + }, + { + "epoch": 0.28641975308641976, + "grad_norm": 0.06173387169837952, + "learning_rate": 4.9718931023987926e-05, + "loss": 0.0043, + "step": 290 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.011783963069319725, + "learning_rate": 4.9650763527795385e-05, + "loss": 0.0032, + "step": 300 + }, + { + "epoch": 0.30617283950617286, + "grad_norm": 0.0035617423709481955, + "learning_rate": 4.9575259353620305e-05, + "loss": 0.0067, + "step": 310 + }, + { + "epoch": 0.3160493827160494, + "grad_norm": 0.007925956510007381, + "learning_rate": 4.949244097337567e-05, + "loss": 0.002, + "step": 320 + }, + { + "epoch": 0.32592592592592595, + "grad_norm": 0.00402588676661253, + "learning_rate": 4.9402333035863344e-05, + "loss": 0.0005, + "step": 330 + }, + { + "epoch": 0.3358024691358025, + "grad_norm": 0.00444524921476841, + "learning_rate": 4.930496235943811e-05, + "loss": 0.0001, + "step": 340 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 0.004133024252951145, + "learning_rate": 4.9200357924025755e-05, + "loss": 0.001, + "step": 350 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.0026975509244948626, + "learning_rate": 4.9088550862497966e-05, + "loss": 0.0042, + "step": 360 + }, + { + "epoch": 0.3654320987654321, + "grad_norm": 0.0009604657534509897, + "learning_rate": 4.8969574451406445e-05, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.37530864197530867, + "grad_norm": 0.0007526307599619031, + "learning_rate": 4.8843464101078924e-05, + "loss": 0.0005, + "step": 380 + }, + { + "epoch": 0.3851851851851852, + "grad_norm": 0.034833673387765884, + "learning_rate": 4.871025734508022e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 0.0006839093985036016, + "learning_rate": 4.85699938290413e-05, + "loss": 0.0011, + "step": 400 + }, + { + "epoch": 0.4049382716049383, + "grad_norm": 0.17423595488071442, + "learning_rate": 4.842271529885978e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 0.4148148148148148, + "grad_norm": 0.0021370204631239176, + "learning_rate": 4.8268465588275235e-05, + "loss": 0.0021, + "step": 420 + }, + { + "epoch": 0.4246913580246914, + "grad_norm": 0.002740511205047369, + "learning_rate": 4.8107290605823306e-05, + "loss": 0.0007, + "step": 430 + }, + { + "epoch": 0.4345679012345679, + "grad_norm": 0.5583088397979736, + "learning_rate": 4.79392383211721e-05, + "loss": 0.001, + "step": 440 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.0016133144963532686, + "learning_rate": 4.776435875084526e-05, + "loss": 0.0001, + "step": 450 + }, + { + "epoch": 0.454320987654321, + "grad_norm": 0.008442943915724754, + "learning_rate": 4.7582703943335785e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 0.4641975308641975, + "grad_norm": 0.0007343398174270988, + "learning_rate": 4.739432796361515e-05, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.4740740740740741, + "grad_norm": 0.0006774268113076687, + "learning_rate": 4.719928687704218e-05, + "loss": 0.0, + "step": 480 + }, + { + "epoch": 0.4839506172839506, + "grad_norm": 0.0005065005389042199, + "learning_rate": 4.699763873267667e-05, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 0.0007372256950475276, + "learning_rate": 4.678944354600249e-05, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.49382716049382713, + "eval_nlpcc25_task1_dev_accuracy": 0.9935714285714285, + "eval_nlpcc25_task1_dev_loss": 0.038043826818466187, + "eval_nlpcc25_task1_dev_runtime": 3452.1571, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 500 + }, + { + "epoch": 0.5037037037037037, + "grad_norm": 0.00031303250580094755, + "learning_rate": 4.65747632810655e-05, + "loss": 0.0002, + "step": 510 + }, + { + "epoch": 0.5135802469135803, + "grad_norm": 0.0012328416341915727, + "learning_rate": 4.635366183203157e-05, + "loss": 0.0101, + "step": 520 + }, + { + "epoch": 0.5234567901234568, + "grad_norm": 0.0003338649112265557, + "learning_rate": 4.612620500417001e-05, + "loss": 0.0, + "step": 530 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.00036818900844082236, + "learning_rate": 4.589246049426835e-05, + "loss": 0.0, + "step": 540 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 0.0008916526567190886, + "learning_rate": 4.565249787048408e-05, + "loss": 0.0005, + "step": 550 + }, + { + "epoch": 0.5530864197530864, + "grad_norm": 0.001147187897004187, + "learning_rate": 4.5406388551639436e-05, + "loss": 0.0, + "step": 560 + }, + { + "epoch": 0.562962962962963, + "grad_norm": 0.00036947213811799884, + "learning_rate": 4.515420578596542e-05, + "loss": 0.0032, + "step": 570 + }, + { + "epoch": 0.5728395061728395, + "grad_norm": 0.0018035719403997064, + "learning_rate": 4.489602462930126e-05, + "loss": 0.0, + "step": 580 + }, + { + "epoch": 0.582716049382716, + "grad_norm": 0.0003090534301009029, + "learning_rate": 4.4631921922755985e-05, + "loss": 0.0, + "step": 590 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0003804001025855541, + "learning_rate": 4.436197626983855e-05, + "loss": 0.0, + "step": 600 + }, + { + "epoch": 0.6024691358024692, + "grad_norm": 0.0002678770397324115, + "learning_rate": 4.4086268013063556e-05, + "loss": 0.0, + "step": 610 + }, + { + "epoch": 0.6123456790123457, + "grad_norm": 0.0003251763992011547, + "learning_rate": 4.3804879210039275e-05, + "loss": 0.0, + "step": 620 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.00025686624576337636, + "learning_rate": 4.351789360904527e-05, + "loss": 0.0, + "step": 630 + }, + { + "epoch": 0.6320987654320988, + "grad_norm": 0.00028061174089089036, + "learning_rate": 4.322539662410687e-05, + "loss": 0.0, + "step": 640 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 0.0002689982939045876, + "learning_rate": 4.29274753095738e-05, + "loss": 0.0005, + "step": 650 + }, + { + "epoch": 0.6518518518518519, + "grad_norm": 0.0013502618530765176, + "learning_rate": 4.262421833421069e-05, + "loss": 0.0016, + "step": 660 + }, + { + "epoch": 0.6617283950617284, + "grad_norm": 0.0007630003965459764, + "learning_rate": 4.2315715954807e-05, + "loss": 0.0007, + "step": 670 + }, + { + "epoch": 0.671604938271605, + "grad_norm": 0.0010010383557528257, + "learning_rate": 4.200205998931442e-05, + "loss": 0.0, + "step": 680 + }, + { + "epoch": 0.6814814814814815, + "grad_norm": 0.0028828333597630262, + "learning_rate": 4.1683343789519544e-05, + "loss": 0.0, + "step": 690 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 0.0005191878299228847, + "learning_rate": 4.135966221326007e-05, + "loss": 0.0002, + "step": 700 + }, + { + "epoch": 0.7012345679012346, + "grad_norm": 0.0010550885926932096, + "learning_rate": 4.103111159619274e-05, + "loss": 0.0, + "step": 710 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.00028645008569583297, + "learning_rate": 4.0697789723121485e-05, + "loss": 0.0024, + "step": 720 + }, + { + "epoch": 0.7209876543209877, + "grad_norm": 0.00031866817153058946, + "learning_rate": 4.035979579889424e-05, + "loss": 0.0, + "step": 730 + }, + { + "epoch": 0.7308641975308642, + "grad_norm": 0.0003414931707084179, + "learning_rate": 4.001723041887713e-05, + "loss": 0.0, + "step": 740 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.00028110420680604875, + "learning_rate": 3.967019553901477e-05, + "loss": 0.0, + "step": 750 + }, + { + "epoch": 0.7506172839506173, + "grad_norm": 0.0002813187020365149, + "learning_rate": 3.931879444548568e-05, + "loss": 0.0, + "step": 760 + }, + { + "epoch": 0.7604938271604939, + "grad_norm": 0.00022827104839961976, + "learning_rate": 3.8963131723961734e-05, + "loss": 0.0, + "step": 770 + }, + { + "epoch": 0.7703703703703704, + "grad_norm": 0.00034623872488737106, + "learning_rate": 3.860331322848091e-05, + "loss": 0.0, + "step": 780 + }, + { + "epoch": 0.7802469135802469, + "grad_norm": 0.000275197351584211, + "learning_rate": 3.823944604994243e-05, + "loss": 0.0, + "step": 790 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 0.0003900310257449746, + "learning_rate": 3.7871638484233966e-05, + "loss": 0.0016, + "step": 800 + }, + { + "epoch": 0.8, + "grad_norm": 0.005876209121197462, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0021, + "step": 810 + }, + { + "epoch": 0.8098765432098766, + "grad_norm": 0.10590516775846481, + "learning_rate": 3.71246412060613e-05, + "loss": 0.0004, + "step": 820 + }, + { + "epoch": 0.8197530864197531, + "grad_norm": 0.010082660242915154, + "learning_rate": 3.674567381849498e-05, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 0.8296296296296296, + "grad_norm": 0.0013996075140312314, + "learning_rate": 3.6363210627385004e-05, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 0.0020888964645564556, + "learning_rate": 3.59773654632531e-05, + "loss": 0.0001, + "step": 850 + }, + { + "epoch": 0.8493827160493828, + "grad_norm": 0.0007959024515002966, + "learning_rate": 3.558825316317998e-05, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 0.8592592592592593, + "grad_norm": 0.0046511865220963955, + "learning_rate": 3.5195989536626925e-05, + "loss": 0.0, + "step": 870 + }, + { + "epoch": 0.8691358024691358, + "grad_norm": 0.3736809194087982, + "learning_rate": 3.4800691330968064e-05, + "loss": 0.0057, + "step": 880 + }, + { + "epoch": 0.8790123456790123, + "grad_norm": 0.003749604569748044, + "learning_rate": 3.440247619674347e-05, + "loss": 0.0, + "step": 890 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.0007721478468738496, + "learning_rate": 3.400146265264341e-05, + "loss": 0.0002, + "step": 900 + }, + { + "epoch": 0.8987654320987655, + "grad_norm": 0.000859771971590817, + "learning_rate": 3.359777005023428e-05, + "loss": 0.0, + "step": 910 + }, + { + "epoch": 0.908641975308642, + "grad_norm": 0.0005010567838326097, + "learning_rate": 3.3191518538436596e-05, + "loss": 0.0, + "step": 920 + }, + { + "epoch": 0.9185185185185185, + "grad_norm": 0.00021397744421847165, + "learning_rate": 3.278282902776569e-05, + "loss": 0.0, + "step": 930 + }, + { + "epoch": 0.928395061728395, + "grad_norm": 0.0004123291582800448, + "learning_rate": 3.237182315434582e-05, + "loss": 0.0001, + "step": 940 + }, + { + "epoch": 0.9382716049382716, + "grad_norm": 0.0003536223666742444, + "learning_rate": 3.195862324370812e-05, + "loss": 0.0001, + "step": 950 + }, + { + "epoch": 0.9481481481481482, + "grad_norm": 0.0007978660287335515, + "learning_rate": 3.154335227438362e-05, + "loss": 0.0, + "step": 960 + }, + { + "epoch": 0.9580246913580247, + "grad_norm": 0.0002476648660376668, + "learning_rate": 3.112613384130168e-05, + "loss": 0.0, + "step": 970 + }, + { + "epoch": 0.9679012345679012, + "grad_norm": 0.001542024314403534, + "learning_rate": 3.0707092119005155e-05, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.0001304072793573141, + "learning_rate": 3.028635182469294e-05, + "loss": 0.0, + "step": 990 + }, + { + "epoch": 0.9876543209876543, + "grad_norm": 0.0019521948415786028, + "learning_rate": 2.9864038181101046e-05, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 0.9876543209876543, + "eval_nlpcc25_task1_dev_accuracy": 0.9957792207792208, + "eval_nlpcc25_task1_dev_loss": 0.025204554200172424, + "eval_nlpcc25_task1_dev_runtime": 3451.9671, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1000 + } + ], + "logging_steps": 10, + "max_steps": 2024, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.347300197692867e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/training_args.bin b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dca977cd6736713fb5f4fb68d38053f8e5f452eb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95434493472159610d4c2dd6dc378ebbfd2ed878eb7c793a26ac8ce723b446c0 +size 5752 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/README.md b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b3b3f81cc13b36bc8025f54bbde88be77c46fd5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/README.md @@ -0,0 +1,202 @@ +--- +base_model: /data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/adapter_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5914f072a289e8f8e163cee6ef6f15c727e47c85 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "q_proj", + "up_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/adapter_model.safetensors b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c75429ac099e0fd8dd585a11b9e266775f96fda6 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1afc48bf84ac9bd42ad55963d42b8ccb703443ece3c2a6346fa671f2f1cd6b74 +size 83945296 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/optimizer.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a40531f920776086898e6e68f82f9b9b364ec7a --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94fb6bcd61d44ecf8724eb3dc2ee3ccfb6717294498b83899c41e545c97203ec +size 168149074 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/rng_state.pth b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..06c25b71551e4830f82f7687d2345c7fbd987daa --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3ee827a7a00012c0a116546df467feee35e70376d81a7a85b1a70eb90414d3 +size 14244 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/scheduler.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5ebcca7b39793f0af508b1d4877eac1b5931ced --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90baf8c5136f17df7ae98d5f074d7aed3821567bf7809d3538c0d7513645071c +size 1064 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/special_tokens_map.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c3a478b842fa66e6a8c10265478284c1d4f41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ca9fbea85debf5fb15193ad8ef61d682f121c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/trainer_state.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5df293c76c7584374f3c2d2c7f753a07486224fa --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/trainer_state.json @@ -0,0 +1,1111 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4809876543209877, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009876543209876543, + "grad_norm": 8.173213958740234, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.3879, + "step": 10 + }, + { + "epoch": 0.019753086419753086, + "grad_norm": 7.614120960235596, + "learning_rate": 4.926108374384237e-06, + "loss": 1.3407, + "step": 20 + }, + { + "epoch": 0.02962962962962963, + "grad_norm": 6.4321064949035645, + "learning_rate": 7.3891625615763555e-06, + "loss": 1.1236, + "step": 30 + }, + { + "epoch": 0.03950617283950617, + "grad_norm": 2.8025548458099365, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5763, + "step": 40 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 0.7712829113006592, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.1632, + "step": 50 + }, + { + "epoch": 0.05925925925925926, + "grad_norm": 0.35205233097076416, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.0571, + "step": 60 + }, + { + "epoch": 0.0691358024691358, + "grad_norm": 0.9003845453262329, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.0374, + "step": 70 + }, + { + "epoch": 0.07901234567901234, + "grad_norm": 1.4704982042312622, + "learning_rate": 1.970443349753695e-05, + "loss": 0.0226, + "step": 80 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 2.574737310409546, + "learning_rate": 2.2167487684729066e-05, + "loss": 0.0129, + "step": 90 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 5.429731369018555, + "learning_rate": 2.4630541871921184e-05, + "loss": 0.0239, + "step": 100 + }, + { + "epoch": 0.10864197530864197, + "grad_norm": 0.17149314284324646, + "learning_rate": 2.70935960591133e-05, + "loss": 0.0102, + "step": 110 + }, + { + "epoch": 0.11851851851851852, + "grad_norm": 0.3575906753540039, + "learning_rate": 2.9556650246305422e-05, + "loss": 0.0089, + "step": 120 + }, + { + "epoch": 0.12839506172839507, + "grad_norm": 0.30634525418281555, + "learning_rate": 3.2019704433497536e-05, + "loss": 0.0028, + "step": 130 + }, + { + "epoch": 0.1382716049382716, + "grad_norm": 0.006271605845540762, + "learning_rate": 3.4482758620689657e-05, + "loss": 0.0006, + "step": 140 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.01288707833737135, + "learning_rate": 3.694581280788178e-05, + "loss": 0.0008, + "step": 150 + }, + { + "epoch": 0.1580246913580247, + "grad_norm": 0.004254512023180723, + "learning_rate": 3.94088669950739e-05, + "loss": 0.0008, + "step": 160 + }, + { + "epoch": 0.16790123456790124, + "grad_norm": 0.017511000856757164, + "learning_rate": 4.187192118226601e-05, + "loss": 0.0051, + "step": 170 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.001938911504112184, + "learning_rate": 4.433497536945813e-05, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.18765432098765433, + "grad_norm": 0.5761703848838806, + "learning_rate": 4.679802955665025e-05, + "loss": 0.0005, + "step": 190 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 0.0074734254740178585, + "learning_rate": 4.926108374384237e-05, + "loss": 0.0003, + "step": 200 + }, + { + "epoch": 0.2074074074074074, + "grad_norm": 0.0016212465707212687, + "learning_rate": 4.9998177025185267e-05, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 0.21728395061728395, + "grad_norm": 0.0033307652920484543, + "learning_rate": 4.998924880895427e-05, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.2271604938271605, + "grad_norm": 0.009896264411509037, + "learning_rate": 4.997288317313464e-05, + "loss": 0.001, + "step": 230 + }, + { + "epoch": 0.23703703703703705, + "grad_norm": 0.00731613440439105, + "learning_rate": 4.994908498854508e-05, + "loss": 0.0021, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 0.01958666928112507, + "learning_rate": 4.991786133811494e-05, + "loss": 0.0024, + "step": 250 + }, + { + "epoch": 0.25679012345679014, + "grad_norm": 0.008584077470004559, + "learning_rate": 4.9879221514776196e-05, + "loss": 0.0002, + "step": 260 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.0017416627379134297, + "learning_rate": 4.983317701869765e-05, + "loss": 0.0001, + "step": 270 + }, + { + "epoch": 0.2765432098765432, + "grad_norm": 0.8311202526092529, + "learning_rate": 4.977974155386214e-05, + "loss": 0.0014, + "step": 280 + }, + { + "epoch": 0.28641975308641976, + "grad_norm": 0.06173387169837952, + "learning_rate": 4.9718931023987926e-05, + "loss": 0.0043, + "step": 290 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.011783963069319725, + "learning_rate": 4.9650763527795385e-05, + "loss": 0.0032, + "step": 300 + }, + { + "epoch": 0.30617283950617286, + "grad_norm": 0.0035617423709481955, + "learning_rate": 4.9575259353620305e-05, + "loss": 0.0067, + "step": 310 + }, + { + "epoch": 0.3160493827160494, + "grad_norm": 0.007925956510007381, + "learning_rate": 4.949244097337567e-05, + "loss": 0.002, + "step": 320 + }, + { + "epoch": 0.32592592592592595, + "grad_norm": 0.00402588676661253, + "learning_rate": 4.9402333035863344e-05, + "loss": 0.0005, + "step": 330 + }, + { + "epoch": 0.3358024691358025, + "grad_norm": 0.00444524921476841, + "learning_rate": 4.930496235943811e-05, + "loss": 0.0001, + "step": 340 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 0.004133024252951145, + "learning_rate": 4.9200357924025755e-05, + "loss": 0.001, + "step": 350 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.0026975509244948626, + "learning_rate": 4.9088550862497966e-05, + "loss": 0.0042, + "step": 360 + }, + { + "epoch": 0.3654320987654321, + "grad_norm": 0.0009604657534509897, + "learning_rate": 4.8969574451406445e-05, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.37530864197530867, + "grad_norm": 0.0007526307599619031, + "learning_rate": 4.8843464101078924e-05, + "loss": 0.0005, + "step": 380 + }, + { + "epoch": 0.3851851851851852, + "grad_norm": 0.034833673387765884, + "learning_rate": 4.871025734508022e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 0.0006839093985036016, + "learning_rate": 4.85699938290413e-05, + "loss": 0.0011, + "step": 400 + }, + { + "epoch": 0.4049382716049383, + "grad_norm": 0.17423595488071442, + "learning_rate": 4.842271529885978e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 0.4148148148148148, + "grad_norm": 0.0021370204631239176, + "learning_rate": 4.8268465588275235e-05, + "loss": 0.0021, + "step": 420 + }, + { + "epoch": 0.4246913580246914, + "grad_norm": 0.002740511205047369, + "learning_rate": 4.8107290605823306e-05, + "loss": 0.0007, + "step": 430 + }, + { + "epoch": 0.4345679012345679, + "grad_norm": 0.5583088397979736, + "learning_rate": 4.79392383211721e-05, + "loss": 0.001, + "step": 440 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.0016133144963532686, + "learning_rate": 4.776435875084526e-05, + "loss": 0.0001, + "step": 450 + }, + { + "epoch": 0.454320987654321, + "grad_norm": 0.008442943915724754, + "learning_rate": 4.7582703943335785e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 0.4641975308641975, + "grad_norm": 0.0007343398174270988, + "learning_rate": 4.739432796361515e-05, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.4740740740740741, + "grad_norm": 0.0006774268113076687, + "learning_rate": 4.719928687704218e-05, + "loss": 0.0, + "step": 480 + }, + { + "epoch": 0.4839506172839506, + "grad_norm": 0.0005065005389042199, + "learning_rate": 4.699763873267667e-05, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 0.0007372256950475276, + "learning_rate": 4.678944354600249e-05, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.49382716049382713, + "eval_nlpcc25_task1_dev_accuracy": 0.9935714285714285, + "eval_nlpcc25_task1_dev_loss": 0.038043826818466187, + "eval_nlpcc25_task1_dev_runtime": 3452.1571, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 500 + }, + { + "epoch": 0.5037037037037037, + "grad_norm": 0.00031303250580094755, + "learning_rate": 4.65747632810655e-05, + "loss": 0.0002, + "step": 510 + }, + { + "epoch": 0.5135802469135803, + "grad_norm": 0.0012328416341915727, + "learning_rate": 4.635366183203157e-05, + "loss": 0.0101, + "step": 520 + }, + { + "epoch": 0.5234567901234568, + "grad_norm": 0.0003338649112265557, + "learning_rate": 4.612620500417001e-05, + "loss": 0.0, + "step": 530 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.00036818900844082236, + "learning_rate": 4.589246049426835e-05, + "loss": 0.0, + "step": 540 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 0.0008916526567190886, + "learning_rate": 4.565249787048408e-05, + "loss": 0.0005, + "step": 550 + }, + { + "epoch": 0.5530864197530864, + "grad_norm": 0.001147187897004187, + "learning_rate": 4.5406388551639436e-05, + "loss": 0.0, + "step": 560 + }, + { + "epoch": 0.562962962962963, + "grad_norm": 0.00036947213811799884, + "learning_rate": 4.515420578596542e-05, + "loss": 0.0032, + "step": 570 + }, + { + "epoch": 0.5728395061728395, + "grad_norm": 0.0018035719403997064, + "learning_rate": 4.489602462930126e-05, + "loss": 0.0, + "step": 580 + }, + { + "epoch": 0.582716049382716, + "grad_norm": 0.0003090534301009029, + "learning_rate": 4.4631921922755985e-05, + "loss": 0.0, + "step": 590 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0003804001025855541, + "learning_rate": 4.436197626983855e-05, + "loss": 0.0, + "step": 600 + }, + { + "epoch": 0.6024691358024692, + "grad_norm": 0.0002678770397324115, + "learning_rate": 4.4086268013063556e-05, + "loss": 0.0, + "step": 610 + }, + { + "epoch": 0.6123456790123457, + "grad_norm": 0.0003251763992011547, + "learning_rate": 4.3804879210039275e-05, + "loss": 0.0, + "step": 620 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.00025686624576337636, + "learning_rate": 4.351789360904527e-05, + "loss": 0.0, + "step": 630 + }, + { + "epoch": 0.6320987654320988, + "grad_norm": 0.00028061174089089036, + "learning_rate": 4.322539662410687e-05, + "loss": 0.0, + "step": 640 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 0.0002689982939045876, + "learning_rate": 4.29274753095738e-05, + "loss": 0.0005, + "step": 650 + }, + { + "epoch": 0.6518518518518519, + "grad_norm": 0.0013502618530765176, + "learning_rate": 4.262421833421069e-05, + "loss": 0.0016, + "step": 660 + }, + { + "epoch": 0.6617283950617284, + "grad_norm": 0.0007630003965459764, + "learning_rate": 4.2315715954807e-05, + "loss": 0.0007, + "step": 670 + }, + { + "epoch": 0.671604938271605, + "grad_norm": 0.0010010383557528257, + "learning_rate": 4.200205998931442e-05, + "loss": 0.0, + "step": 680 + }, + { + "epoch": 0.6814814814814815, + "grad_norm": 0.0028828333597630262, + "learning_rate": 4.1683343789519544e-05, + "loss": 0.0, + "step": 690 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 0.0005191878299228847, + "learning_rate": 4.135966221326007e-05, + "loss": 0.0002, + "step": 700 + }, + { + "epoch": 0.7012345679012346, + "grad_norm": 0.0010550885926932096, + "learning_rate": 4.103111159619274e-05, + "loss": 0.0, + "step": 710 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.00028645008569583297, + "learning_rate": 4.0697789723121485e-05, + "loss": 0.0024, + "step": 720 + }, + { + "epoch": 0.7209876543209877, + "grad_norm": 0.00031866817153058946, + "learning_rate": 4.035979579889424e-05, + "loss": 0.0, + "step": 730 + }, + { + "epoch": 0.7308641975308642, + "grad_norm": 0.0003414931707084179, + "learning_rate": 4.001723041887713e-05, + "loss": 0.0, + "step": 740 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.00028110420680604875, + "learning_rate": 3.967019553901477e-05, + "loss": 0.0, + "step": 750 + }, + { + "epoch": 0.7506172839506173, + "grad_norm": 0.0002813187020365149, + "learning_rate": 3.931879444548568e-05, + "loss": 0.0, + "step": 760 + }, + { + "epoch": 0.7604938271604939, + "grad_norm": 0.00022827104839961976, + "learning_rate": 3.8963131723961734e-05, + "loss": 0.0, + "step": 770 + }, + { + "epoch": 0.7703703703703704, + "grad_norm": 0.00034623872488737106, + "learning_rate": 3.860331322848091e-05, + "loss": 0.0, + "step": 780 + }, + { + "epoch": 0.7802469135802469, + "grad_norm": 0.000275197351584211, + "learning_rate": 3.823944604994243e-05, + "loss": 0.0, + "step": 790 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 0.0003900310257449746, + "learning_rate": 3.7871638484233966e-05, + "loss": 0.0016, + "step": 800 + }, + { + "epoch": 0.8, + "grad_norm": 0.005876209121197462, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0021, + "step": 810 + }, + { + "epoch": 0.8098765432098766, + "grad_norm": 0.10590516775846481, + "learning_rate": 3.71246412060613e-05, + "loss": 0.0004, + "step": 820 + }, + { + "epoch": 0.8197530864197531, + "grad_norm": 0.010082660242915154, + "learning_rate": 3.674567381849498e-05, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 0.8296296296296296, + "grad_norm": 0.0013996075140312314, + "learning_rate": 3.6363210627385004e-05, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 0.0020888964645564556, + "learning_rate": 3.59773654632531e-05, + "loss": 0.0001, + "step": 850 + }, + { + "epoch": 0.8493827160493828, + "grad_norm": 0.0007959024515002966, + "learning_rate": 3.558825316317998e-05, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 0.8592592592592593, + "grad_norm": 0.0046511865220963955, + "learning_rate": 3.5195989536626925e-05, + "loss": 0.0, + "step": 870 + }, + { + "epoch": 0.8691358024691358, + "grad_norm": 0.3736809194087982, + "learning_rate": 3.4800691330968064e-05, + "loss": 0.0057, + "step": 880 + }, + { + "epoch": 0.8790123456790123, + "grad_norm": 0.003749604569748044, + "learning_rate": 3.440247619674347e-05, + "loss": 0.0, + "step": 890 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.0007721478468738496, + "learning_rate": 3.400146265264341e-05, + "loss": 0.0002, + "step": 900 + }, + { + "epoch": 0.8987654320987655, + "grad_norm": 0.000859771971590817, + "learning_rate": 3.359777005023428e-05, + "loss": 0.0, + "step": 910 + }, + { + "epoch": 0.908641975308642, + "grad_norm": 0.0005010567838326097, + "learning_rate": 3.3191518538436596e-05, + "loss": 0.0, + "step": 920 + }, + { + "epoch": 0.9185185185185185, + "grad_norm": 0.00021397744421847165, + "learning_rate": 3.278282902776569e-05, + "loss": 0.0, + "step": 930 + }, + { + "epoch": 0.928395061728395, + "grad_norm": 0.0004123291582800448, + "learning_rate": 3.237182315434582e-05, + "loss": 0.0001, + "step": 940 + }, + { + "epoch": 0.9382716049382716, + "grad_norm": 0.0003536223666742444, + "learning_rate": 3.195862324370812e-05, + "loss": 0.0001, + "step": 950 + }, + { + "epoch": 0.9481481481481482, + "grad_norm": 0.0007978660287335515, + "learning_rate": 3.154335227438362e-05, + "loss": 0.0, + "step": 960 + }, + { + "epoch": 0.9580246913580247, + "grad_norm": 0.0002476648660376668, + "learning_rate": 3.112613384130168e-05, + "loss": 0.0, + "step": 970 + }, + { + "epoch": 0.9679012345679012, + "grad_norm": 0.001542024314403534, + "learning_rate": 3.0707092119005155e-05, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.0001304072793573141, + "learning_rate": 3.028635182469294e-05, + "loss": 0.0, + "step": 990 + }, + { + "epoch": 0.9876543209876543, + "grad_norm": 0.0019521948415786028, + "learning_rate": 2.9864038181101046e-05, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 0.9876543209876543, + "eval_nlpcc25_task1_dev_accuracy": 0.9957792207792208, + "eval_nlpcc25_task1_dev_loss": 0.025204554200172424, + "eval_nlpcc25_task1_dev_runtime": 3451.9671, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1000 + }, + { + "epoch": 0.9975308641975309, + "grad_norm": 0.0017585157183930278, + "learning_rate": 2.9440276879233197e-05, + "loss": 0.0, + "step": 1010 + }, + { + "epoch": 1.0069135802469136, + "grad_norm": 0.00013237106031738222, + "learning_rate": 2.9015194040952105e-05, + "loss": 0.0, + "step": 1020 + }, + { + "epoch": 1.0167901234567902, + "grad_norm": 0.0001692405203357339, + "learning_rate": 2.858891618144246e-05, + "loss": 0.0, + "step": 1030 + }, + { + "epoch": 1.0266666666666666, + "grad_norm": 0.00012385584705043584, + "learning_rate": 2.8161570171556867e-05, + "loss": 0.0, + "step": 1040 + }, + { + "epoch": 1.0365432098765432, + "grad_norm": 0.00046954487334005535, + "learning_rate": 2.7733283200055966e-05, + "loss": 0.0, + "step": 1050 + }, + { + "epoch": 1.0464197530864197, + "grad_norm": 0.000127663035527803, + "learning_rate": 2.7304182735753864e-05, + "loss": 0.0, + "step": 1060 + }, + { + "epoch": 1.0562962962962963, + "grad_norm": 0.00011920407268917188, + "learning_rate": 2.68743964895803e-05, + "loss": 0.0, + "step": 1070 + }, + { + "epoch": 1.066172839506173, + "grad_norm": 0.000622686231508851, + "learning_rate": 2.6444052376570677e-05, + "loss": 0.0, + "step": 1080 + }, + { + "epoch": 1.0760493827160493, + "grad_norm": 0.00012072878598701209, + "learning_rate": 2.60132784777954e-05, + "loss": 0.0, + "step": 1090 + }, + { + "epoch": 1.085925925925926, + "grad_norm": 0.00013644102727994323, + "learning_rate": 2.5582203002239757e-05, + "loss": 0.0, + "step": 1100 + }, + { + "epoch": 1.0958024691358024, + "grad_norm": 0.00014129135524854064, + "learning_rate": 2.515095424864577e-05, + "loss": 0.0, + "step": 1110 + }, + { + "epoch": 1.105679012345679, + "grad_norm": 0.00017698659212328494, + "learning_rate": 2.471966056732728e-05, + "loss": 0.0, + "step": 1120 + }, + { + "epoch": 1.1155555555555556, + "grad_norm": 0.00010272293002344668, + "learning_rate": 2.4288450321969752e-05, + "loss": 0.0, + "step": 1130 + }, + { + "epoch": 1.125432098765432, + "grad_norm": 0.00011754959268728271, + "learning_rate": 2.385745185142603e-05, + "loss": 0.0, + "step": 1140 + }, + { + "epoch": 1.1353086419753087, + "grad_norm": 0.00014159196871332824, + "learning_rate": 2.3426793431519524e-05, + "loss": 0.0, + "step": 1150 + }, + { + "epoch": 1.145185185185185, + "grad_norm": 0.00012516119750216603, + "learning_rate": 2.2996603236866168e-05, + "loss": 0.0, + "step": 1160 + }, + { + "epoch": 1.1550617283950617, + "grad_norm": 0.0001459266641177237, + "learning_rate": 2.2567009302726442e-05, + "loss": 0.0, + "step": 1170 + }, + { + "epoch": 1.1649382716049383, + "grad_norm": 0.00011655504204099998, + "learning_rate": 2.2138139486898916e-05, + "loss": 0.0, + "step": 1180 + }, + { + "epoch": 1.1748148148148148, + "grad_norm": 0.0714588537812233, + "learning_rate": 2.171012143166663e-05, + "loss": 0.0002, + "step": 1190 + }, + { + "epoch": 1.1846913580246914, + "grad_norm": 0.00010159167140955105, + "learning_rate": 2.1283082525807554e-05, + "loss": 0.0, + "step": 1200 + }, + { + "epoch": 1.194567901234568, + "grad_norm": 0.00014003751857671887, + "learning_rate": 2.0857149866680555e-05, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 1.2044444444444444, + "grad_norm": 0.00010035983723355457, + "learning_rate": 2.043245022239806e-05, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 1.214320987654321, + "grad_norm": 9.884718747343868e-05, + "learning_rate": 2.000910999409672e-05, + "loss": 0.0, + "step": 1230 + }, + { + "epoch": 1.2241975308641975, + "grad_norm": 9.416105604032055e-05, + "learning_rate": 1.9587255178317327e-05, + "loss": 0.0, + "step": 1240 + }, + { + "epoch": 1.234074074074074, + "grad_norm": 0.0001408099487889558, + "learning_rate": 1.9167011329505064e-05, + "loss": 0.0015, + "step": 1250 + }, + { + "epoch": 1.2439506172839505, + "grad_norm": 9.506545029580593e-05, + "learning_rate": 1.8748503522641487e-05, + "loss": 0.0, + "step": 1260 + }, + { + "epoch": 1.2538271604938271, + "grad_norm": 9.596488962415606e-05, + "learning_rate": 1.8331856316019024e-05, + "loss": 0.0, + "step": 1270 + }, + { + "epoch": 1.2637037037037038, + "grad_norm": 0.00010258956899633631, + "learning_rate": 1.791719371416936e-05, + "loss": 0.0, + "step": 1280 + }, + { + "epoch": 1.2735802469135802, + "grad_norm": 9.748171578394249e-05, + "learning_rate": 1.7504639130956652e-05, + "loss": 0.0, + "step": 1290 + }, + { + "epoch": 1.2834567901234568, + "grad_norm": 8.836873894324526e-05, + "learning_rate": 1.7094315352846473e-05, + "loss": 0.0, + "step": 1300 + }, + { + "epoch": 1.2933333333333334, + "grad_norm": 9.162294736597687e-05, + "learning_rate": 1.6686344502361516e-05, + "loss": 0.0, + "step": 1310 + }, + { + "epoch": 1.3032098765432099, + "grad_norm": 9.847845649346709e-05, + "learning_rate": 1.6280848001734943e-05, + "loss": 0.0, + "step": 1320 + }, + { + "epoch": 1.3130864197530865, + "grad_norm": 0.002509386744350195, + "learning_rate": 1.5877946536772065e-05, + "loss": 0.0, + "step": 1330 + }, + { + "epoch": 1.322962962962963, + "grad_norm": 9.157544263871387e-05, + "learning_rate": 1.5477760020931302e-05, + "loss": 0.0, + "step": 1340 + }, + { + "epoch": 1.3328395061728395, + "grad_norm": 0.00010282491712132469, + "learning_rate": 1.5080407559634929e-05, + "loss": 0.0, + "step": 1350 + }, + { + "epoch": 1.342716049382716, + "grad_norm": 0.13170938193798065, + "learning_rate": 1.468600741482038e-05, + "loss": 0.0001, + "step": 1360 + }, + { + "epoch": 1.3525925925925926, + "grad_norm": 9.943981422111392e-05, + "learning_rate": 1.4294676969742571e-05, + "loss": 0.0, + "step": 1370 + }, + { + "epoch": 1.3624691358024692, + "grad_norm": 8.701250771991909e-05, + "learning_rate": 1.390653269403771e-05, + "loss": 0.0, + "step": 1380 + }, + { + "epoch": 1.3723456790123456, + "grad_norm": 9.3141570687294e-05, + "learning_rate": 1.3521690109059062e-05, + "loss": 0.0017, + "step": 1390 + }, + { + "epoch": 1.3822222222222222, + "grad_norm": 0.00011883996921824291, + "learning_rate": 1.3140263753494903e-05, + "loss": 0.0, + "step": 1400 + }, + { + "epoch": 1.3920987654320989, + "grad_norm": 9.17040160857141e-05, + "learning_rate": 1.276236714927902e-05, + "loss": 0.0, + "step": 1410 + }, + { + "epoch": 1.4019753086419753, + "grad_norm": 0.00010277926048729569, + "learning_rate": 1.2388112767803729e-05, + "loss": 0.0, + "step": 1420 + }, + { + "epoch": 1.411851851851852, + "grad_norm": 8.92517709871754e-05, + "learning_rate": 1.2017611996445644e-05, + "loss": 0.0, + "step": 1430 + }, + { + "epoch": 1.4217283950617283, + "grad_norm": 8.916323713492602e-05, + "learning_rate": 1.1650975105413981e-05, + "loss": 0.0, + "step": 1440 + }, + { + "epoch": 1.431604938271605, + "grad_norm": 8.675019489601254e-05, + "learning_rate": 1.1288311214931446e-05, + "loss": 0.0, + "step": 1450 + }, + { + "epoch": 1.4414814814814814, + "grad_norm": 9.536254219710827e-05, + "learning_rate": 1.092972826275735e-05, + "loss": 0.0, + "step": 1460 + }, + { + "epoch": 1.451358024691358, + "grad_norm": 9.104371565626934e-05, + "learning_rate": 1.057533297206263e-05, + "loss": 0.0, + "step": 1470 + }, + { + "epoch": 1.4612345679012346, + "grad_norm": 9.839163249125704e-05, + "learning_rate": 1.0225230819666431e-05, + "loss": 0.0, + "step": 1480 + }, + { + "epoch": 1.471111111111111, + "grad_norm": 8.303455979330465e-05, + "learning_rate": 9.879526004643586e-06, + "loss": 0.0, + "step": 1490 + }, + { + "epoch": 1.4809876543209877, + "grad_norm": 9.03740365174599e-05, + "learning_rate": 9.538321417312351e-06, + "loss": 0.0, + "step": 1500 + }, + { + "epoch": 1.4809876543209877, + "eval_nlpcc25_task1_dev_accuracy": 0.9967857142857143, + "eval_nlpcc25_task1_dev_loss": 0.020024757832288742, + "eval_nlpcc25_task1_dev_runtime": 3451.5414, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1500 + } + ], + "logging_steps": 10, + "max_steps": 2024, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.250241852975612e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/training_args.bin b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dca977cd6736713fb5f4fb68d38053f8e5f452eb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95434493472159610d4c2dd6dc378ebbfd2ed878eb7c793a26ac8ce723b446c0 +size 5752 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/README.md b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b3b3f81cc13b36bc8025f54bbde88be77c46fd5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/README.md @@ -0,0 +1,202 @@ +--- +base_model: /data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/adapter_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5914f072a289e8f8e163cee6ef6f15c727e47c85 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "q_proj", + "up_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/adapter_model.safetensors b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e194e88574e49b3621bc4d3af1e896f5578bcd5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c000ca9e439be4c7cebf33af93834497a66bb06b7493e9d6df5e614c20400f6 +size 83945296 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/optimizer.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7232ba21b74420b65a6c0d88122597669e744760 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cfcf9d778d7aac9c7cd36500c650da4dcb6d522095c36157e76a5f592e9d14e +size 168149074 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/rng_state.pth b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..41dfa7d7903dea42d227bad638c2c750928d590c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2 +size 14244 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/scheduler.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..855f5595efd2705b36e76a5775e1be388ddf5d9e --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae7c40cd0f1ca6a41ad8ec9a539f7c6cbf2c7be18ade7ef81f27bb15e3de4af +size 1064 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/special_tokens_map.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c3a478b842fa66e6a8c10265478284c1d4f41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ca9fbea85debf5fb15193ad8ef61d682f121c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/trainer_state.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d09e0c7a665654d45a7ac142ba4911812d5f0dc1 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/trainer_state.json @@ -0,0 +1,1470 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9748148148148148, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009876543209876543, + "grad_norm": 8.173213958740234, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.3879, + "step": 10 + }, + { + "epoch": 0.019753086419753086, + "grad_norm": 7.614120960235596, + "learning_rate": 4.926108374384237e-06, + "loss": 1.3407, + "step": 20 + }, + { + "epoch": 0.02962962962962963, + "grad_norm": 6.4321064949035645, + "learning_rate": 7.3891625615763555e-06, + "loss": 1.1236, + "step": 30 + }, + { + "epoch": 0.03950617283950617, + "grad_norm": 2.8025548458099365, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5763, + "step": 40 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 0.7712829113006592, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.1632, + "step": 50 + }, + { + "epoch": 0.05925925925925926, + "grad_norm": 0.35205233097076416, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.0571, + "step": 60 + }, + { + "epoch": 0.0691358024691358, + "grad_norm": 0.9003845453262329, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.0374, + "step": 70 + }, + { + "epoch": 0.07901234567901234, + "grad_norm": 1.4704982042312622, + "learning_rate": 1.970443349753695e-05, + "loss": 0.0226, + "step": 80 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 2.574737310409546, + "learning_rate": 2.2167487684729066e-05, + "loss": 0.0129, + "step": 90 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 5.429731369018555, + "learning_rate": 2.4630541871921184e-05, + "loss": 0.0239, + "step": 100 + }, + { + "epoch": 0.10864197530864197, + "grad_norm": 0.17149314284324646, + "learning_rate": 2.70935960591133e-05, + "loss": 0.0102, + "step": 110 + }, + { + "epoch": 0.11851851851851852, + "grad_norm": 0.3575906753540039, + "learning_rate": 2.9556650246305422e-05, + "loss": 0.0089, + "step": 120 + }, + { + "epoch": 0.12839506172839507, + "grad_norm": 0.30634525418281555, + "learning_rate": 3.2019704433497536e-05, + "loss": 0.0028, + "step": 130 + }, + { + "epoch": 0.1382716049382716, + "grad_norm": 0.006271605845540762, + "learning_rate": 3.4482758620689657e-05, + "loss": 0.0006, + "step": 140 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.01288707833737135, + "learning_rate": 3.694581280788178e-05, + "loss": 0.0008, + "step": 150 + }, + { + "epoch": 0.1580246913580247, + "grad_norm": 0.004254512023180723, + "learning_rate": 3.94088669950739e-05, + "loss": 0.0008, + "step": 160 + }, + { + "epoch": 0.16790123456790124, + "grad_norm": 0.017511000856757164, + "learning_rate": 4.187192118226601e-05, + "loss": 0.0051, + "step": 170 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.001938911504112184, + "learning_rate": 4.433497536945813e-05, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.18765432098765433, + "grad_norm": 0.5761703848838806, + "learning_rate": 4.679802955665025e-05, + "loss": 0.0005, + "step": 190 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 0.0074734254740178585, + "learning_rate": 4.926108374384237e-05, + "loss": 0.0003, + "step": 200 + }, + { + "epoch": 0.2074074074074074, + "grad_norm": 0.0016212465707212687, + "learning_rate": 4.9998177025185267e-05, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 0.21728395061728395, + "grad_norm": 0.0033307652920484543, + "learning_rate": 4.998924880895427e-05, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.2271604938271605, + "grad_norm": 0.009896264411509037, + "learning_rate": 4.997288317313464e-05, + "loss": 0.001, + "step": 230 + }, + { + "epoch": 0.23703703703703705, + "grad_norm": 0.00731613440439105, + "learning_rate": 4.994908498854508e-05, + "loss": 0.0021, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 0.01958666928112507, + "learning_rate": 4.991786133811494e-05, + "loss": 0.0024, + "step": 250 + }, + { + "epoch": 0.25679012345679014, + "grad_norm": 0.008584077470004559, + "learning_rate": 4.9879221514776196e-05, + "loss": 0.0002, + "step": 260 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.0017416627379134297, + "learning_rate": 4.983317701869765e-05, + "loss": 0.0001, + "step": 270 + }, + { + "epoch": 0.2765432098765432, + "grad_norm": 0.8311202526092529, + "learning_rate": 4.977974155386214e-05, + "loss": 0.0014, + "step": 280 + }, + { + "epoch": 0.28641975308641976, + "grad_norm": 0.06173387169837952, + "learning_rate": 4.9718931023987926e-05, + "loss": 0.0043, + "step": 290 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.011783963069319725, + "learning_rate": 4.9650763527795385e-05, + "loss": 0.0032, + "step": 300 + }, + { + "epoch": 0.30617283950617286, + "grad_norm": 0.0035617423709481955, + "learning_rate": 4.9575259353620305e-05, + "loss": 0.0067, + "step": 310 + }, + { + "epoch": 0.3160493827160494, + "grad_norm": 0.007925956510007381, + "learning_rate": 4.949244097337567e-05, + "loss": 0.002, + "step": 320 + }, + { + "epoch": 0.32592592592592595, + "grad_norm": 0.00402588676661253, + "learning_rate": 4.9402333035863344e-05, + "loss": 0.0005, + "step": 330 + }, + { + "epoch": 0.3358024691358025, + "grad_norm": 0.00444524921476841, + "learning_rate": 4.930496235943811e-05, + "loss": 0.0001, + "step": 340 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 0.004133024252951145, + "learning_rate": 4.9200357924025755e-05, + "loss": 0.001, + "step": 350 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.0026975509244948626, + "learning_rate": 4.9088550862497966e-05, + "loss": 0.0042, + "step": 360 + }, + { + "epoch": 0.3654320987654321, + "grad_norm": 0.0009604657534509897, + "learning_rate": 4.8969574451406445e-05, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.37530864197530867, + "grad_norm": 0.0007526307599619031, + "learning_rate": 4.8843464101078924e-05, + "loss": 0.0005, + "step": 380 + }, + { + "epoch": 0.3851851851851852, + "grad_norm": 0.034833673387765884, + "learning_rate": 4.871025734508022e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 0.0006839093985036016, + "learning_rate": 4.85699938290413e-05, + "loss": 0.0011, + "step": 400 + }, + { + "epoch": 0.4049382716049383, + "grad_norm": 0.17423595488071442, + "learning_rate": 4.842271529885978e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 0.4148148148148148, + "grad_norm": 0.0021370204631239176, + "learning_rate": 4.8268465588275235e-05, + "loss": 0.0021, + "step": 420 + }, + { + "epoch": 0.4246913580246914, + "grad_norm": 0.002740511205047369, + "learning_rate": 4.8107290605823306e-05, + "loss": 0.0007, + "step": 430 + }, + { + "epoch": 0.4345679012345679, + "grad_norm": 0.5583088397979736, + "learning_rate": 4.79392383211721e-05, + "loss": 0.001, + "step": 440 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.0016133144963532686, + "learning_rate": 4.776435875084526e-05, + "loss": 0.0001, + "step": 450 + }, + { + "epoch": 0.454320987654321, + "grad_norm": 0.008442943915724754, + "learning_rate": 4.7582703943335785e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 0.4641975308641975, + "grad_norm": 0.0007343398174270988, + "learning_rate": 4.739432796361515e-05, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.4740740740740741, + "grad_norm": 0.0006774268113076687, + "learning_rate": 4.719928687704218e-05, + "loss": 0.0, + "step": 480 + }, + { + "epoch": 0.4839506172839506, + "grad_norm": 0.0005065005389042199, + "learning_rate": 4.699763873267667e-05, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 0.0007372256950475276, + "learning_rate": 4.678944354600249e-05, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.49382716049382713, + "eval_nlpcc25_task1_dev_accuracy": 0.9935714285714285, + "eval_nlpcc25_task1_dev_loss": 0.038043826818466187, + "eval_nlpcc25_task1_dev_runtime": 3452.1571, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 500 + }, + { + "epoch": 0.5037037037037037, + "grad_norm": 0.00031303250580094755, + "learning_rate": 4.65747632810655e-05, + "loss": 0.0002, + "step": 510 + }, + { + "epoch": 0.5135802469135803, + "grad_norm": 0.0012328416341915727, + "learning_rate": 4.635366183203157e-05, + "loss": 0.0101, + "step": 520 + }, + { + "epoch": 0.5234567901234568, + "grad_norm": 0.0003338649112265557, + "learning_rate": 4.612620500417001e-05, + "loss": 0.0, + "step": 530 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.00036818900844082236, + "learning_rate": 4.589246049426835e-05, + "loss": 0.0, + "step": 540 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 0.0008916526567190886, + "learning_rate": 4.565249787048408e-05, + "loss": 0.0005, + "step": 550 + }, + { + "epoch": 0.5530864197530864, + "grad_norm": 0.001147187897004187, + "learning_rate": 4.5406388551639436e-05, + "loss": 0.0, + "step": 560 + }, + { + "epoch": 0.562962962962963, + "grad_norm": 0.00036947213811799884, + "learning_rate": 4.515420578596542e-05, + "loss": 0.0032, + "step": 570 + }, + { + "epoch": 0.5728395061728395, + "grad_norm": 0.0018035719403997064, + "learning_rate": 4.489602462930126e-05, + "loss": 0.0, + "step": 580 + }, + { + "epoch": 0.582716049382716, + "grad_norm": 0.0003090534301009029, + "learning_rate": 4.4631921922755985e-05, + "loss": 0.0, + "step": 590 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0003804001025855541, + "learning_rate": 4.436197626983855e-05, + "loss": 0.0, + "step": 600 + }, + { + "epoch": 0.6024691358024692, + "grad_norm": 0.0002678770397324115, + "learning_rate": 4.4086268013063556e-05, + "loss": 0.0, + "step": 610 + }, + { + "epoch": 0.6123456790123457, + "grad_norm": 0.0003251763992011547, + "learning_rate": 4.3804879210039275e-05, + "loss": 0.0, + "step": 620 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.00025686624576337636, + "learning_rate": 4.351789360904527e-05, + "loss": 0.0, + "step": 630 + }, + { + "epoch": 0.6320987654320988, + "grad_norm": 0.00028061174089089036, + "learning_rate": 4.322539662410687e-05, + "loss": 0.0, + "step": 640 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 0.0002689982939045876, + "learning_rate": 4.29274753095738e-05, + "loss": 0.0005, + "step": 650 + }, + { + "epoch": 0.6518518518518519, + "grad_norm": 0.0013502618530765176, + "learning_rate": 4.262421833421069e-05, + "loss": 0.0016, + "step": 660 + }, + { + "epoch": 0.6617283950617284, + "grad_norm": 0.0007630003965459764, + "learning_rate": 4.2315715954807e-05, + "loss": 0.0007, + "step": 670 + }, + { + "epoch": 0.671604938271605, + "grad_norm": 0.0010010383557528257, + "learning_rate": 4.200205998931442e-05, + "loss": 0.0, + "step": 680 + }, + { + "epoch": 0.6814814814814815, + "grad_norm": 0.0028828333597630262, + "learning_rate": 4.1683343789519544e-05, + "loss": 0.0, + "step": 690 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 0.0005191878299228847, + "learning_rate": 4.135966221326007e-05, + "loss": 0.0002, + "step": 700 + }, + { + "epoch": 0.7012345679012346, + "grad_norm": 0.0010550885926932096, + "learning_rate": 4.103111159619274e-05, + "loss": 0.0, + "step": 710 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.00028645008569583297, + "learning_rate": 4.0697789723121485e-05, + "loss": 0.0024, + "step": 720 + }, + { + "epoch": 0.7209876543209877, + "grad_norm": 0.00031866817153058946, + "learning_rate": 4.035979579889424e-05, + "loss": 0.0, + "step": 730 + }, + { + "epoch": 0.7308641975308642, + "grad_norm": 0.0003414931707084179, + "learning_rate": 4.001723041887713e-05, + "loss": 0.0, + "step": 740 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.00028110420680604875, + "learning_rate": 3.967019553901477e-05, + "loss": 0.0, + "step": 750 + }, + { + "epoch": 0.7506172839506173, + "grad_norm": 0.0002813187020365149, + "learning_rate": 3.931879444548568e-05, + "loss": 0.0, + "step": 760 + }, + { + "epoch": 0.7604938271604939, + "grad_norm": 0.00022827104839961976, + "learning_rate": 3.8963131723961734e-05, + "loss": 0.0, + "step": 770 + }, + { + "epoch": 0.7703703703703704, + "grad_norm": 0.00034623872488737106, + "learning_rate": 3.860331322848091e-05, + "loss": 0.0, + "step": 780 + }, + { + "epoch": 0.7802469135802469, + "grad_norm": 0.000275197351584211, + "learning_rate": 3.823944604994243e-05, + "loss": 0.0, + "step": 790 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 0.0003900310257449746, + "learning_rate": 3.7871638484233966e-05, + "loss": 0.0016, + "step": 800 + }, + { + "epoch": 0.8, + "grad_norm": 0.005876209121197462, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0021, + "step": 810 + }, + { + "epoch": 0.8098765432098766, + "grad_norm": 0.10590516775846481, + "learning_rate": 3.71246412060613e-05, + "loss": 0.0004, + "step": 820 + }, + { + "epoch": 0.8197530864197531, + "grad_norm": 0.010082660242915154, + "learning_rate": 3.674567381849498e-05, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 0.8296296296296296, + "grad_norm": 0.0013996075140312314, + "learning_rate": 3.6363210627385004e-05, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 0.0020888964645564556, + "learning_rate": 3.59773654632531e-05, + "loss": 0.0001, + "step": 850 + }, + { + "epoch": 0.8493827160493828, + "grad_norm": 0.0007959024515002966, + "learning_rate": 3.558825316317998e-05, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 0.8592592592592593, + "grad_norm": 0.0046511865220963955, + "learning_rate": 3.5195989536626925e-05, + "loss": 0.0, + "step": 870 + }, + { + "epoch": 0.8691358024691358, + "grad_norm": 0.3736809194087982, + "learning_rate": 3.4800691330968064e-05, + "loss": 0.0057, + "step": 880 + }, + { + "epoch": 0.8790123456790123, + "grad_norm": 0.003749604569748044, + "learning_rate": 3.440247619674347e-05, + "loss": 0.0, + "step": 890 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.0007721478468738496, + "learning_rate": 3.400146265264341e-05, + "loss": 0.0002, + "step": 900 + }, + { + "epoch": 0.8987654320987655, + "grad_norm": 0.000859771971590817, + "learning_rate": 3.359777005023428e-05, + "loss": 0.0, + "step": 910 + }, + { + "epoch": 0.908641975308642, + "grad_norm": 0.0005010567838326097, + "learning_rate": 3.3191518538436596e-05, + "loss": 0.0, + "step": 920 + }, + { + "epoch": 0.9185185185185185, + "grad_norm": 0.00021397744421847165, + "learning_rate": 3.278282902776569e-05, + "loss": 0.0, + "step": 930 + }, + { + "epoch": 0.928395061728395, + "grad_norm": 0.0004123291582800448, + "learning_rate": 3.237182315434582e-05, + "loss": 0.0001, + "step": 940 + }, + { + "epoch": 0.9382716049382716, + "grad_norm": 0.0003536223666742444, + "learning_rate": 3.195862324370812e-05, + "loss": 0.0001, + "step": 950 + }, + { + "epoch": 0.9481481481481482, + "grad_norm": 0.0007978660287335515, + "learning_rate": 3.154335227438362e-05, + "loss": 0.0, + "step": 960 + }, + { + "epoch": 0.9580246913580247, + "grad_norm": 0.0002476648660376668, + "learning_rate": 3.112613384130168e-05, + "loss": 0.0, + "step": 970 + }, + { + "epoch": 0.9679012345679012, + "grad_norm": 0.001542024314403534, + "learning_rate": 3.0707092119005155e-05, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.0001304072793573141, + "learning_rate": 3.028635182469294e-05, + "loss": 0.0, + "step": 990 + }, + { + "epoch": 0.9876543209876543, + "grad_norm": 0.0019521948415786028, + "learning_rate": 2.9864038181101046e-05, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 0.9876543209876543, + "eval_nlpcc25_task1_dev_accuracy": 0.9957792207792208, + "eval_nlpcc25_task1_dev_loss": 0.025204554200172424, + "eval_nlpcc25_task1_dev_runtime": 3451.9671, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1000 + }, + { + "epoch": 0.9975308641975309, + "grad_norm": 0.0017585157183930278, + "learning_rate": 2.9440276879233197e-05, + "loss": 0.0, + "step": 1010 + }, + { + "epoch": 1.0069135802469136, + "grad_norm": 0.00013237106031738222, + "learning_rate": 2.9015194040952105e-05, + "loss": 0.0, + "step": 1020 + }, + { + "epoch": 1.0167901234567902, + "grad_norm": 0.0001692405203357339, + "learning_rate": 2.858891618144246e-05, + "loss": 0.0, + "step": 1030 + }, + { + "epoch": 1.0266666666666666, + "grad_norm": 0.00012385584705043584, + "learning_rate": 2.8161570171556867e-05, + "loss": 0.0, + "step": 1040 + }, + { + "epoch": 1.0365432098765432, + "grad_norm": 0.00046954487334005535, + "learning_rate": 2.7733283200055966e-05, + "loss": 0.0, + "step": 1050 + }, + { + "epoch": 1.0464197530864197, + "grad_norm": 0.000127663035527803, + "learning_rate": 2.7304182735753864e-05, + "loss": 0.0, + "step": 1060 + }, + { + "epoch": 1.0562962962962963, + "grad_norm": 0.00011920407268917188, + "learning_rate": 2.68743964895803e-05, + "loss": 0.0, + "step": 1070 + }, + { + "epoch": 1.066172839506173, + "grad_norm": 0.000622686231508851, + "learning_rate": 2.6444052376570677e-05, + "loss": 0.0, + "step": 1080 + }, + { + "epoch": 1.0760493827160493, + "grad_norm": 0.00012072878598701209, + "learning_rate": 2.60132784777954e-05, + "loss": 0.0, + "step": 1090 + }, + { + "epoch": 1.085925925925926, + "grad_norm": 0.00013644102727994323, + "learning_rate": 2.5582203002239757e-05, + "loss": 0.0, + "step": 1100 + }, + { + "epoch": 1.0958024691358024, + "grad_norm": 0.00014129135524854064, + "learning_rate": 2.515095424864577e-05, + "loss": 0.0, + "step": 1110 + }, + { + "epoch": 1.105679012345679, + "grad_norm": 0.00017698659212328494, + "learning_rate": 2.471966056732728e-05, + "loss": 0.0, + "step": 1120 + }, + { + "epoch": 1.1155555555555556, + "grad_norm": 0.00010272293002344668, + "learning_rate": 2.4288450321969752e-05, + "loss": 0.0, + "step": 1130 + }, + { + "epoch": 1.125432098765432, + "grad_norm": 0.00011754959268728271, + "learning_rate": 2.385745185142603e-05, + "loss": 0.0, + "step": 1140 + }, + { + "epoch": 1.1353086419753087, + "grad_norm": 0.00014159196871332824, + "learning_rate": 2.3426793431519524e-05, + "loss": 0.0, + "step": 1150 + }, + { + "epoch": 1.145185185185185, + "grad_norm": 0.00012516119750216603, + "learning_rate": 2.2996603236866168e-05, + "loss": 0.0, + "step": 1160 + }, + { + "epoch": 1.1550617283950617, + "grad_norm": 0.0001459266641177237, + "learning_rate": 2.2567009302726442e-05, + "loss": 0.0, + "step": 1170 + }, + { + "epoch": 1.1649382716049383, + "grad_norm": 0.00011655504204099998, + "learning_rate": 2.2138139486898916e-05, + "loss": 0.0, + "step": 1180 + }, + { + "epoch": 1.1748148148148148, + "grad_norm": 0.0714588537812233, + "learning_rate": 2.171012143166663e-05, + "loss": 0.0002, + "step": 1190 + }, + { + "epoch": 1.1846913580246914, + "grad_norm": 0.00010159167140955105, + "learning_rate": 2.1283082525807554e-05, + "loss": 0.0, + "step": 1200 + }, + { + "epoch": 1.194567901234568, + "grad_norm": 0.00014003751857671887, + "learning_rate": 2.0857149866680555e-05, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 1.2044444444444444, + "grad_norm": 0.00010035983723355457, + "learning_rate": 2.043245022239806e-05, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 1.214320987654321, + "grad_norm": 9.884718747343868e-05, + "learning_rate": 2.000910999409672e-05, + "loss": 0.0, + "step": 1230 + }, + { + "epoch": 1.2241975308641975, + "grad_norm": 9.416105604032055e-05, + "learning_rate": 1.9587255178317327e-05, + "loss": 0.0, + "step": 1240 + }, + { + "epoch": 1.234074074074074, + "grad_norm": 0.0001408099487889558, + "learning_rate": 1.9167011329505064e-05, + "loss": 0.0015, + "step": 1250 + }, + { + "epoch": 1.2439506172839505, + "grad_norm": 9.506545029580593e-05, + "learning_rate": 1.8748503522641487e-05, + "loss": 0.0, + "step": 1260 + }, + { + "epoch": 1.2538271604938271, + "grad_norm": 9.596488962415606e-05, + "learning_rate": 1.8331856316019024e-05, + "loss": 0.0, + "step": 1270 + }, + { + "epoch": 1.2637037037037038, + "grad_norm": 0.00010258956899633631, + "learning_rate": 1.791719371416936e-05, + "loss": 0.0, + "step": 1280 + }, + { + "epoch": 1.2735802469135802, + "grad_norm": 9.748171578394249e-05, + "learning_rate": 1.7504639130956652e-05, + "loss": 0.0, + "step": 1290 + }, + { + "epoch": 1.2834567901234568, + "grad_norm": 8.836873894324526e-05, + "learning_rate": 1.7094315352846473e-05, + "loss": 0.0, + "step": 1300 + }, + { + "epoch": 1.2933333333333334, + "grad_norm": 9.162294736597687e-05, + "learning_rate": 1.6686344502361516e-05, + "loss": 0.0, + "step": 1310 + }, + { + "epoch": 1.3032098765432099, + "grad_norm": 9.847845649346709e-05, + "learning_rate": 1.6280848001734943e-05, + "loss": 0.0, + "step": 1320 + }, + { + "epoch": 1.3130864197530865, + "grad_norm": 0.002509386744350195, + "learning_rate": 1.5877946536772065e-05, + "loss": 0.0, + "step": 1330 + }, + { + "epoch": 1.322962962962963, + "grad_norm": 9.157544263871387e-05, + "learning_rate": 1.5477760020931302e-05, + "loss": 0.0, + "step": 1340 + }, + { + "epoch": 1.3328395061728395, + "grad_norm": 0.00010282491712132469, + "learning_rate": 1.5080407559634929e-05, + "loss": 0.0, + "step": 1350 + }, + { + "epoch": 1.342716049382716, + "grad_norm": 0.13170938193798065, + "learning_rate": 1.468600741482038e-05, + "loss": 0.0001, + "step": 1360 + }, + { + "epoch": 1.3525925925925926, + "grad_norm": 9.943981422111392e-05, + "learning_rate": 1.4294676969742571e-05, + "loss": 0.0, + "step": 1370 + }, + { + "epoch": 1.3624691358024692, + "grad_norm": 8.701250771991909e-05, + "learning_rate": 1.390653269403771e-05, + "loss": 0.0, + "step": 1380 + }, + { + "epoch": 1.3723456790123456, + "grad_norm": 9.3141570687294e-05, + "learning_rate": 1.3521690109059062e-05, + "loss": 0.0017, + "step": 1390 + }, + { + "epoch": 1.3822222222222222, + "grad_norm": 0.00011883996921824291, + "learning_rate": 1.3140263753494903e-05, + "loss": 0.0, + "step": 1400 + }, + { + "epoch": 1.3920987654320989, + "grad_norm": 9.17040160857141e-05, + "learning_rate": 1.276236714927902e-05, + "loss": 0.0, + "step": 1410 + }, + { + "epoch": 1.4019753086419753, + "grad_norm": 0.00010277926048729569, + "learning_rate": 1.2388112767803729e-05, + "loss": 0.0, + "step": 1420 + }, + { + "epoch": 1.411851851851852, + "grad_norm": 8.92517709871754e-05, + "learning_rate": 1.2017611996445644e-05, + "loss": 0.0, + "step": 1430 + }, + { + "epoch": 1.4217283950617283, + "grad_norm": 8.916323713492602e-05, + "learning_rate": 1.1650975105413981e-05, + "loss": 0.0, + "step": 1440 + }, + { + "epoch": 1.431604938271605, + "grad_norm": 8.675019489601254e-05, + "learning_rate": 1.1288311214931446e-05, + "loss": 0.0, + "step": 1450 + }, + { + "epoch": 1.4414814814814814, + "grad_norm": 9.536254219710827e-05, + "learning_rate": 1.092972826275735e-05, + "loss": 0.0, + "step": 1460 + }, + { + "epoch": 1.451358024691358, + "grad_norm": 9.104371565626934e-05, + "learning_rate": 1.057533297206263e-05, + "loss": 0.0, + "step": 1470 + }, + { + "epoch": 1.4612345679012346, + "grad_norm": 9.839163249125704e-05, + "learning_rate": 1.0225230819666431e-05, + "loss": 0.0, + "step": 1480 + }, + { + "epoch": 1.471111111111111, + "grad_norm": 8.303455979330465e-05, + "learning_rate": 9.879526004643586e-06, + "loss": 0.0, + "step": 1490 + }, + { + "epoch": 1.4809876543209877, + "grad_norm": 9.03740365174599e-05, + "learning_rate": 9.538321417312351e-06, + "loss": 0.0, + "step": 1500 + }, + { + "epoch": 1.4809876543209877, + "eval_nlpcc25_task1_dev_accuracy": 0.9967857142857143, + "eval_nlpcc25_task1_dev_loss": 0.020024757832288742, + "eval_nlpcc25_task1_dev_runtime": 3451.5414, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1500 + }, + { + "epoch": 1.4908641975308643, + "grad_norm": 8.933599019655958e-05, + "learning_rate": 9.201718608611729e-06, + "loss": 0.0, + "step": 1510 + }, + { + "epoch": 1.5007407407407407, + "grad_norm": 0.00010863789066206664, + "learning_rate": 8.869817759877388e-06, + "loss": 0.0, + "step": 1520 + }, + { + "epoch": 1.5106172839506171, + "grad_norm": 9.006269829114899e-05, + "learning_rate": 8.542717653025211e-06, + "loss": 0.0, + "step": 1530 + }, + { + "epoch": 1.520493827160494, + "grad_norm": 9.235177276423201e-05, + "learning_rate": 8.220515641151359e-06, + "loss": 0.0, + "step": 1540 + }, + { + "epoch": 1.5303703703703704, + "grad_norm": 9.809488983592018e-05, + "learning_rate": 7.90330761955756e-06, + "loss": 0.0, + "step": 1550 + }, + { + "epoch": 1.5402469135802468, + "grad_norm": 8.182202145690098e-05, + "learning_rate": 7.591187997210305e-06, + "loss": 0.0, + "step": 1560 + }, + { + "epoch": 1.5501234567901234, + "grad_norm": 8.401457307627425e-05, + "learning_rate": 7.284249668642404e-06, + "loss": 0.0, + "step": 1570 + }, + { + "epoch": 1.56, + "grad_norm": 8.956337114796042e-05, + "learning_rate": 6.9825839863052554e-06, + "loss": 0.0, + "step": 1580 + }, + { + "epoch": 1.5698765432098765, + "grad_norm": 0.00010167937580263242, + "learning_rate": 6.686280733380107e-06, + "loss": 0.0, + "step": 1590 + }, + { + "epoch": 1.579753086419753, + "grad_norm": 7.88121105870232e-05, + "learning_rate": 6.395428097056349e-06, + "loss": 0.0, + "step": 1600 + }, + { + "epoch": 1.5896296296296297, + "grad_norm": 8.662124309921637e-05, + "learning_rate": 6.110112642284829e-06, + "loss": 0.0, + "step": 1610 + }, + { + "epoch": 1.5995061728395061, + "grad_norm": 8.408601570408791e-05, + "learning_rate": 5.830419286013969e-06, + "loss": 0.0, + "step": 1620 + }, + { + "epoch": 1.6093827160493828, + "grad_norm": 8.535439701518044e-05, + "learning_rate": 5.5564312719163875e-06, + "loss": 0.0, + "step": 1630 + }, + { + "epoch": 1.6192592592592594, + "grad_norm": 9.110941755352542e-05, + "learning_rate": 5.288230145613498e-06, + "loss": 0.0, + "step": 1640 + }, + { + "epoch": 1.6291358024691358, + "grad_norm": 9.438098641112447e-05, + "learning_rate": 5.025895730405566e-06, + "loss": 0.0, + "step": 1650 + }, + { + "epoch": 1.6390123456790122, + "grad_norm": 9.488565410720184e-05, + "learning_rate": 4.769506103514232e-06, + "loss": 0.0, + "step": 1660 + }, + { + "epoch": 1.6488888888888888, + "grad_norm": 9.606828825781122e-05, + "learning_rate": 4.51913757284487e-06, + "loss": 0.0, + "step": 1670 + }, + { + "epoch": 1.6587654320987655, + "grad_norm": 8.396390330744907e-05, + "learning_rate": 4.274864654275438e-06, + "loss": 0.0, + "step": 1680 + }, + { + "epoch": 1.668641975308642, + "grad_norm": 9.46772561292164e-05, + "learning_rate": 4.036760049478697e-06, + "loss": 0.0, + "step": 1690 + }, + { + "epoch": 1.6785185185185185, + "grad_norm": 7.928520062705502e-05, + "learning_rate": 3.8048946242843995e-06, + "loss": 0.0, + "step": 1700 + }, + { + "epoch": 1.6883950617283952, + "grad_norm": 8.762006473261863e-05, + "learning_rate": 3.5793373875878806e-06, + "loss": 0.0, + "step": 1710 + }, + { + "epoch": 1.6982716049382716, + "grad_norm": 8.157498814398423e-05, + "learning_rate": 3.3601554708112713e-06, + "loss": 0.0, + "step": 1720 + }, + { + "epoch": 1.7081481481481482, + "grad_norm": 7.945331162773073e-05, + "learning_rate": 3.14741410792353e-06, + "loss": 0.0, + "step": 1730 + }, + { + "epoch": 1.7180246913580248, + "grad_norm": 8.328018884640187e-05, + "learning_rate": 2.941176616025215e-06, + "loss": 0.0006, + "step": 1740 + }, + { + "epoch": 1.7279012345679012, + "grad_norm": 8.189202344510704e-05, + "learning_rate": 2.741504376503737e-06, + "loss": 0.0, + "step": 1750 + }, + { + "epoch": 1.7377777777777776, + "grad_norm": 7.249087502714247e-05, + "learning_rate": 2.5484568167647245e-06, + "loss": 0.0, + "step": 1760 + }, + { + "epoch": 1.7476543209876543, + "grad_norm": 7.910580461611971e-05, + "learning_rate": 2.362091392544985e-06, + "loss": 0.0, + "step": 1770 + }, + { + "epoch": 1.757530864197531, + "grad_norm": 7.526679837610573e-05, + "learning_rate": 2.1824635708122267e-06, + "loss": 0.0, + "step": 1780 + }, + { + "epoch": 1.7674074074074073, + "grad_norm": 9.244989632861689e-05, + "learning_rate": 2.0096268132567183e-06, + "loss": 0.0, + "step": 1790 + }, + { + "epoch": 1.777283950617284, + "grad_norm": 7.878772157710046e-05, + "learning_rate": 1.843632560379785e-06, + "loss": 0.0, + "step": 1800 + }, + { + "epoch": 1.7871604938271606, + "grad_norm": 8.294432336697355e-05, + "learning_rate": 1.684530216183805e-06, + "loss": 0.0002, + "step": 1810 + }, + { + "epoch": 1.797037037037037, + "grad_norm": 8.14785817055963e-05, + "learning_rate": 1.5323671334684042e-06, + "loss": 0.0, + "step": 1820 + }, + { + "epoch": 1.8069135802469136, + "grad_norm": 7.684365846216679e-05, + "learning_rate": 1.3871885997370464e-06, + "loss": 0.0, + "step": 1830 + }, + { + "epoch": 1.8167901234567903, + "grad_norm": 7.302551966859028e-05, + "learning_rate": 1.2490378237183658e-06, + "loss": 0.0, + "step": 1840 + }, + { + "epoch": 1.8266666666666667, + "grad_norm": 8.272424020105973e-05, + "learning_rate": 1.1179559225061809e-06, + "loss": 0.0, + "step": 1850 + }, + { + "epoch": 1.836543209876543, + "grad_norm": 8.503070421284065e-05, + "learning_rate": 9.93981909322031e-07, + "loss": 0.0, + "step": 1860 + }, + { + "epoch": 1.8464197530864197, + "grad_norm": 9.047168714459985e-05, + "learning_rate": 8.771526819038644e-07, + "loss": 0.0, + "step": 1870 + }, + { + "epoch": 1.8562962962962963, + "grad_norm": 7.630702020833269e-05, + "learning_rate": 7.675030115243676e-07, + "loss": 0.0, + "step": 1880 + }, + { + "epoch": 1.8661728395061727, + "grad_norm": 8.353806333616376e-05, + "learning_rate": 6.650655326421646e-07, + "loss": 0.0, + "step": 1890 + }, + { + "epoch": 1.8760493827160494, + "grad_norm": 8.572315709898248e-05, + "learning_rate": 5.698707331890013e-07, + "loss": 0.0, + "step": 1900 + }, + { + "epoch": 1.885925925925926, + "grad_norm": 8.084969158517197e-05, + "learning_rate": 4.819469454957787e-07, + "loss": 0.0, + "step": 1910 + }, + { + "epoch": 1.8958024691358024, + "grad_norm": 0.17167682945728302, + "learning_rate": 4.013203378601449e-07, + "loss": 0.0028, + "step": 1920 + }, + { + "epoch": 1.905679012345679, + "grad_norm": 0.00012432083894964308, + "learning_rate": 3.2801490675817283e-07, + "loss": 0.0, + "step": 1930 + }, + { + "epoch": 1.9155555555555557, + "grad_norm": 8.875240746419877e-05, + "learning_rate": 2.6205246970239514e-07, + "loss": 0.0, + "step": 1940 + }, + { + "epoch": 1.925432098765432, + "grad_norm": 8.457344665657729e-05, + "learning_rate": 2.0345265874839593e-07, + "loss": 0.0, + "step": 1950 + }, + { + "epoch": 1.9353086419753085, + "grad_norm": 0.00028203128022141755, + "learning_rate": 1.522329146518009e-07, + "loss": 0.0, + "step": 1960 + }, + { + "epoch": 1.9451851851851854, + "grad_norm": 0.00013342279999051243, + "learning_rate": 1.0840848167749385e-07, + "loss": 0.0, + "step": 1970 + }, + { + "epoch": 1.9550617283950618, + "grad_norm": 8.94103359314613e-05, + "learning_rate": 7.199240306254296e-08, + "loss": 0.0, + "step": 1980 + }, + { + "epoch": 1.9649382716049382, + "grad_norm": 7.630888285348192e-05, + "learning_rate": 4.299551713420058e-08, + "loss": 0.0, + "step": 1990 + }, + { + "epoch": 1.9748148148148148, + "grad_norm": 7.469132106052712e-05, + "learning_rate": 2.1426454084153003e-08, + "loss": 0.0, + "step": 2000 + }, + { + "epoch": 1.9748148148148148, + "eval_nlpcc25_task1_dev_accuracy": 0.996525974025974, + "eval_nlpcc25_task1_dev_loss": 0.021287396550178528, + "eval_nlpcc25_task1_dev_runtime": 3451.6242, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 2000 + } + ], + "logging_steps": 10, + "max_steps": 2024, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6675425616031908e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/training_args.bin b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dca977cd6736713fb5f4fb68d38053f8e5f452eb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95434493472159610d4c2dd6dc378ebbfd2ed878eb7c793a26ac8ce723b446c0 +size 5752 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/README.md b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b3b3f81cc13b36bc8025f54bbde88be77c46fd5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/README.md @@ -0,0 +1,202 @@ +--- +base_model: /data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/adapter_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5914f072a289e8f8e163cee6ef6f15c727e47c85 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "q_proj", + "up_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/adapter_model.safetensors b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c970d7d886dd3a1b4a970acfebfbda006db52b22 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a750af00b88a99412be30c2e37965edd99d811ae8421a68761b0efe29760c252 +size 83945296 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/optimizer.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1274c88fe3b0e432451ab9ebb900dc04670cafc --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efa4c86f06e9d31a9509c17556d36ec57b3c590a85aa72c2cd7ba5554d4beb1 +size 168149074 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/rng_state.pth b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..41dfa7d7903dea42d227bad638c2c750928d590c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2 +size 14244 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/scheduler.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f496ae6c56c3181baebeb39e62f27c91a297633c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44769ff3fbdbd262acd27ec8b6231bef093f2a8e23f7b59af144f48a6b76d650 +size 1064 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/special_tokens_map.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c3a478b842fa66e6a8c10265478284c1d4f41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ca9fbea85debf5fb15193ad8ef61d682f121c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/trainer_state.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0c1b88d85e7afdcb0e6ac793d51452643193a5be --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/trainer_state.json @@ -0,0 +1,1484 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9985185185185186, + "eval_steps": 500, + "global_step": 2024, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009876543209876543, + "grad_norm": 8.173213958740234, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.3879, + "step": 10 + }, + { + "epoch": 0.019753086419753086, + "grad_norm": 7.614120960235596, + "learning_rate": 4.926108374384237e-06, + "loss": 1.3407, + "step": 20 + }, + { + "epoch": 0.02962962962962963, + "grad_norm": 6.4321064949035645, + "learning_rate": 7.3891625615763555e-06, + "loss": 1.1236, + "step": 30 + }, + { + "epoch": 0.03950617283950617, + "grad_norm": 2.8025548458099365, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5763, + "step": 40 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 0.7712829113006592, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.1632, + "step": 50 + }, + { + "epoch": 0.05925925925925926, + "grad_norm": 0.35205233097076416, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.0571, + "step": 60 + }, + { + "epoch": 0.0691358024691358, + "grad_norm": 0.9003845453262329, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.0374, + "step": 70 + }, + { + "epoch": 0.07901234567901234, + "grad_norm": 1.4704982042312622, + "learning_rate": 1.970443349753695e-05, + "loss": 0.0226, + "step": 80 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 2.574737310409546, + "learning_rate": 2.2167487684729066e-05, + "loss": 0.0129, + "step": 90 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 5.429731369018555, + "learning_rate": 2.4630541871921184e-05, + "loss": 0.0239, + "step": 100 + }, + { + "epoch": 0.10864197530864197, + "grad_norm": 0.17149314284324646, + "learning_rate": 2.70935960591133e-05, + "loss": 0.0102, + "step": 110 + }, + { + "epoch": 0.11851851851851852, + "grad_norm": 0.3575906753540039, + "learning_rate": 2.9556650246305422e-05, + "loss": 0.0089, + "step": 120 + }, + { + "epoch": 0.12839506172839507, + "grad_norm": 0.30634525418281555, + "learning_rate": 3.2019704433497536e-05, + "loss": 0.0028, + "step": 130 + }, + { + "epoch": 0.1382716049382716, + "grad_norm": 0.006271605845540762, + "learning_rate": 3.4482758620689657e-05, + "loss": 0.0006, + "step": 140 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.01288707833737135, + "learning_rate": 3.694581280788178e-05, + "loss": 0.0008, + "step": 150 + }, + { + "epoch": 0.1580246913580247, + "grad_norm": 0.004254512023180723, + "learning_rate": 3.94088669950739e-05, + "loss": 0.0008, + "step": 160 + }, + { + "epoch": 0.16790123456790124, + "grad_norm": 0.017511000856757164, + "learning_rate": 4.187192118226601e-05, + "loss": 0.0051, + "step": 170 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.001938911504112184, + "learning_rate": 4.433497536945813e-05, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.18765432098765433, + "grad_norm": 0.5761703848838806, + "learning_rate": 4.679802955665025e-05, + "loss": 0.0005, + "step": 190 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 0.0074734254740178585, + "learning_rate": 4.926108374384237e-05, + "loss": 0.0003, + "step": 200 + }, + { + "epoch": 0.2074074074074074, + "grad_norm": 0.0016212465707212687, + "learning_rate": 4.9998177025185267e-05, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 0.21728395061728395, + "grad_norm": 0.0033307652920484543, + "learning_rate": 4.998924880895427e-05, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.2271604938271605, + "grad_norm": 0.009896264411509037, + "learning_rate": 4.997288317313464e-05, + "loss": 0.001, + "step": 230 + }, + { + "epoch": 0.23703703703703705, + "grad_norm": 0.00731613440439105, + "learning_rate": 4.994908498854508e-05, + "loss": 0.0021, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 0.01958666928112507, + "learning_rate": 4.991786133811494e-05, + "loss": 0.0024, + "step": 250 + }, + { + "epoch": 0.25679012345679014, + "grad_norm": 0.008584077470004559, + "learning_rate": 4.9879221514776196e-05, + "loss": 0.0002, + "step": 260 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.0017416627379134297, + "learning_rate": 4.983317701869765e-05, + "loss": 0.0001, + "step": 270 + }, + { + "epoch": 0.2765432098765432, + "grad_norm": 0.8311202526092529, + "learning_rate": 4.977974155386214e-05, + "loss": 0.0014, + "step": 280 + }, + { + "epoch": 0.28641975308641976, + "grad_norm": 0.06173387169837952, + "learning_rate": 4.9718931023987926e-05, + "loss": 0.0043, + "step": 290 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.011783963069319725, + "learning_rate": 4.9650763527795385e-05, + "loss": 0.0032, + "step": 300 + }, + { + "epoch": 0.30617283950617286, + "grad_norm": 0.0035617423709481955, + "learning_rate": 4.9575259353620305e-05, + "loss": 0.0067, + "step": 310 + }, + { + "epoch": 0.3160493827160494, + "grad_norm": 0.007925956510007381, + "learning_rate": 4.949244097337567e-05, + "loss": 0.002, + "step": 320 + }, + { + "epoch": 0.32592592592592595, + "grad_norm": 0.00402588676661253, + "learning_rate": 4.9402333035863344e-05, + "loss": 0.0005, + "step": 330 + }, + { + "epoch": 0.3358024691358025, + "grad_norm": 0.00444524921476841, + "learning_rate": 4.930496235943811e-05, + "loss": 0.0001, + "step": 340 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 0.004133024252951145, + "learning_rate": 4.9200357924025755e-05, + "loss": 0.001, + "step": 350 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.0026975509244948626, + "learning_rate": 4.9088550862497966e-05, + "loss": 0.0042, + "step": 360 + }, + { + "epoch": 0.3654320987654321, + "grad_norm": 0.0009604657534509897, + "learning_rate": 4.8969574451406445e-05, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.37530864197530867, + "grad_norm": 0.0007526307599619031, + "learning_rate": 4.8843464101078924e-05, + "loss": 0.0005, + "step": 380 + }, + { + "epoch": 0.3851851851851852, + "grad_norm": 0.034833673387765884, + "learning_rate": 4.871025734508022e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 0.0006839093985036016, + "learning_rate": 4.85699938290413e-05, + "loss": 0.0011, + "step": 400 + }, + { + "epoch": 0.4049382716049383, + "grad_norm": 0.17423595488071442, + "learning_rate": 4.842271529885978e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 0.4148148148148148, + "grad_norm": 0.0021370204631239176, + "learning_rate": 4.8268465588275235e-05, + "loss": 0.0021, + "step": 420 + }, + { + "epoch": 0.4246913580246914, + "grad_norm": 0.002740511205047369, + "learning_rate": 4.8107290605823306e-05, + "loss": 0.0007, + "step": 430 + }, + { + "epoch": 0.4345679012345679, + "grad_norm": 0.5583088397979736, + "learning_rate": 4.79392383211721e-05, + "loss": 0.001, + "step": 440 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.0016133144963532686, + "learning_rate": 4.776435875084526e-05, + "loss": 0.0001, + "step": 450 + }, + { + "epoch": 0.454320987654321, + "grad_norm": 0.008442943915724754, + "learning_rate": 4.7582703943335785e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 0.4641975308641975, + "grad_norm": 0.0007343398174270988, + "learning_rate": 4.739432796361515e-05, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.4740740740740741, + "grad_norm": 0.0006774268113076687, + "learning_rate": 4.719928687704218e-05, + "loss": 0.0, + "step": 480 + }, + { + "epoch": 0.4839506172839506, + "grad_norm": 0.0005065005389042199, + "learning_rate": 4.699763873267667e-05, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 0.0007372256950475276, + "learning_rate": 4.678944354600249e-05, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.49382716049382713, + "eval_nlpcc25_task1_dev_accuracy": 0.9935714285714285, + "eval_nlpcc25_task1_dev_loss": 0.038043826818466187, + "eval_nlpcc25_task1_dev_runtime": 3452.1571, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 500 + }, + { + "epoch": 0.5037037037037037, + "grad_norm": 0.00031303250580094755, + "learning_rate": 4.65747632810655e-05, + "loss": 0.0002, + "step": 510 + }, + { + "epoch": 0.5135802469135803, + "grad_norm": 0.0012328416341915727, + "learning_rate": 4.635366183203157e-05, + "loss": 0.0101, + "step": 520 + }, + { + "epoch": 0.5234567901234568, + "grad_norm": 0.0003338649112265557, + "learning_rate": 4.612620500417001e-05, + "loss": 0.0, + "step": 530 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.00036818900844082236, + "learning_rate": 4.589246049426835e-05, + "loss": 0.0, + "step": 540 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 0.0008916526567190886, + "learning_rate": 4.565249787048408e-05, + "loss": 0.0005, + "step": 550 + }, + { + "epoch": 0.5530864197530864, + "grad_norm": 0.001147187897004187, + "learning_rate": 4.5406388551639436e-05, + "loss": 0.0, + "step": 560 + }, + { + "epoch": 0.562962962962963, + "grad_norm": 0.00036947213811799884, + "learning_rate": 4.515420578596542e-05, + "loss": 0.0032, + "step": 570 + }, + { + "epoch": 0.5728395061728395, + "grad_norm": 0.0018035719403997064, + "learning_rate": 4.489602462930126e-05, + "loss": 0.0, + "step": 580 + }, + { + "epoch": 0.582716049382716, + "grad_norm": 0.0003090534301009029, + "learning_rate": 4.4631921922755985e-05, + "loss": 0.0, + "step": 590 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0003804001025855541, + "learning_rate": 4.436197626983855e-05, + "loss": 0.0, + "step": 600 + }, + { + "epoch": 0.6024691358024692, + "grad_norm": 0.0002678770397324115, + "learning_rate": 4.4086268013063556e-05, + "loss": 0.0, + "step": 610 + }, + { + "epoch": 0.6123456790123457, + "grad_norm": 0.0003251763992011547, + "learning_rate": 4.3804879210039275e-05, + "loss": 0.0, + "step": 620 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.00025686624576337636, + "learning_rate": 4.351789360904527e-05, + "loss": 0.0, + "step": 630 + }, + { + "epoch": 0.6320987654320988, + "grad_norm": 0.00028061174089089036, + "learning_rate": 4.322539662410687e-05, + "loss": 0.0, + "step": 640 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 0.0002689982939045876, + "learning_rate": 4.29274753095738e-05, + "loss": 0.0005, + "step": 650 + }, + { + "epoch": 0.6518518518518519, + "grad_norm": 0.0013502618530765176, + "learning_rate": 4.262421833421069e-05, + "loss": 0.0016, + "step": 660 + }, + { + "epoch": 0.6617283950617284, + "grad_norm": 0.0007630003965459764, + "learning_rate": 4.2315715954807e-05, + "loss": 0.0007, + "step": 670 + }, + { + "epoch": 0.671604938271605, + "grad_norm": 0.0010010383557528257, + "learning_rate": 4.200205998931442e-05, + "loss": 0.0, + "step": 680 + }, + { + "epoch": 0.6814814814814815, + "grad_norm": 0.0028828333597630262, + "learning_rate": 4.1683343789519544e-05, + "loss": 0.0, + "step": 690 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 0.0005191878299228847, + "learning_rate": 4.135966221326007e-05, + "loss": 0.0002, + "step": 700 + }, + { + "epoch": 0.7012345679012346, + "grad_norm": 0.0010550885926932096, + "learning_rate": 4.103111159619274e-05, + "loss": 0.0, + "step": 710 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.00028645008569583297, + "learning_rate": 4.0697789723121485e-05, + "loss": 0.0024, + "step": 720 + }, + { + "epoch": 0.7209876543209877, + "grad_norm": 0.00031866817153058946, + "learning_rate": 4.035979579889424e-05, + "loss": 0.0, + "step": 730 + }, + { + "epoch": 0.7308641975308642, + "grad_norm": 0.0003414931707084179, + "learning_rate": 4.001723041887713e-05, + "loss": 0.0, + "step": 740 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.00028110420680604875, + "learning_rate": 3.967019553901477e-05, + "loss": 0.0, + "step": 750 + }, + { + "epoch": 0.7506172839506173, + "grad_norm": 0.0002813187020365149, + "learning_rate": 3.931879444548568e-05, + "loss": 0.0, + "step": 760 + }, + { + "epoch": 0.7604938271604939, + "grad_norm": 0.00022827104839961976, + "learning_rate": 3.8963131723961734e-05, + "loss": 0.0, + "step": 770 + }, + { + "epoch": 0.7703703703703704, + "grad_norm": 0.00034623872488737106, + "learning_rate": 3.860331322848091e-05, + "loss": 0.0, + "step": 780 + }, + { + "epoch": 0.7802469135802469, + "grad_norm": 0.000275197351584211, + "learning_rate": 3.823944604994243e-05, + "loss": 0.0, + "step": 790 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 0.0003900310257449746, + "learning_rate": 3.7871638484233966e-05, + "loss": 0.0016, + "step": 800 + }, + { + "epoch": 0.8, + "grad_norm": 0.005876209121197462, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0021, + "step": 810 + }, + { + "epoch": 0.8098765432098766, + "grad_norm": 0.10590516775846481, + "learning_rate": 3.71246412060613e-05, + "loss": 0.0004, + "step": 820 + }, + { + "epoch": 0.8197530864197531, + "grad_norm": 0.010082660242915154, + "learning_rate": 3.674567381849498e-05, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 0.8296296296296296, + "grad_norm": 0.0013996075140312314, + "learning_rate": 3.6363210627385004e-05, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 0.0020888964645564556, + "learning_rate": 3.59773654632531e-05, + "loss": 0.0001, + "step": 850 + }, + { + "epoch": 0.8493827160493828, + "grad_norm": 0.0007959024515002966, + "learning_rate": 3.558825316317998e-05, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 0.8592592592592593, + "grad_norm": 0.0046511865220963955, + "learning_rate": 3.5195989536626925e-05, + "loss": 0.0, + "step": 870 + }, + { + "epoch": 0.8691358024691358, + "grad_norm": 0.3736809194087982, + "learning_rate": 3.4800691330968064e-05, + "loss": 0.0057, + "step": 880 + }, + { + "epoch": 0.8790123456790123, + "grad_norm": 0.003749604569748044, + "learning_rate": 3.440247619674347e-05, + "loss": 0.0, + "step": 890 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.0007721478468738496, + "learning_rate": 3.400146265264341e-05, + "loss": 0.0002, + "step": 900 + }, + { + "epoch": 0.8987654320987655, + "grad_norm": 0.000859771971590817, + "learning_rate": 3.359777005023428e-05, + "loss": 0.0, + "step": 910 + }, + { + "epoch": 0.908641975308642, + "grad_norm": 0.0005010567838326097, + "learning_rate": 3.3191518538436596e-05, + "loss": 0.0, + "step": 920 + }, + { + "epoch": 0.9185185185185185, + "grad_norm": 0.00021397744421847165, + "learning_rate": 3.278282902776569e-05, + "loss": 0.0, + "step": 930 + }, + { + "epoch": 0.928395061728395, + "grad_norm": 0.0004123291582800448, + "learning_rate": 3.237182315434582e-05, + "loss": 0.0001, + "step": 940 + }, + { + "epoch": 0.9382716049382716, + "grad_norm": 0.0003536223666742444, + "learning_rate": 3.195862324370812e-05, + "loss": 0.0001, + "step": 950 + }, + { + "epoch": 0.9481481481481482, + "grad_norm": 0.0007978660287335515, + "learning_rate": 3.154335227438362e-05, + "loss": 0.0, + "step": 960 + }, + { + "epoch": 0.9580246913580247, + "grad_norm": 0.0002476648660376668, + "learning_rate": 3.112613384130168e-05, + "loss": 0.0, + "step": 970 + }, + { + "epoch": 0.9679012345679012, + "grad_norm": 0.001542024314403534, + "learning_rate": 3.0707092119005155e-05, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.0001304072793573141, + "learning_rate": 3.028635182469294e-05, + "loss": 0.0, + "step": 990 + }, + { + "epoch": 0.9876543209876543, + "grad_norm": 0.0019521948415786028, + "learning_rate": 2.9864038181101046e-05, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 0.9876543209876543, + "eval_nlpcc25_task1_dev_accuracy": 0.9957792207792208, + "eval_nlpcc25_task1_dev_loss": 0.025204554200172424, + "eval_nlpcc25_task1_dev_runtime": 3451.9671, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1000 + }, + { + "epoch": 0.9975308641975309, + "grad_norm": 0.0017585157183930278, + "learning_rate": 2.9440276879233197e-05, + "loss": 0.0, + "step": 1010 + }, + { + "epoch": 1.0069135802469136, + "grad_norm": 0.00013237106031738222, + "learning_rate": 2.9015194040952105e-05, + "loss": 0.0, + "step": 1020 + }, + { + "epoch": 1.0167901234567902, + "grad_norm": 0.0001692405203357339, + "learning_rate": 2.858891618144246e-05, + "loss": 0.0, + "step": 1030 + }, + { + "epoch": 1.0266666666666666, + "grad_norm": 0.00012385584705043584, + "learning_rate": 2.8161570171556867e-05, + "loss": 0.0, + "step": 1040 + }, + { + "epoch": 1.0365432098765432, + "grad_norm": 0.00046954487334005535, + "learning_rate": 2.7733283200055966e-05, + "loss": 0.0, + "step": 1050 + }, + { + "epoch": 1.0464197530864197, + "grad_norm": 0.000127663035527803, + "learning_rate": 2.7304182735753864e-05, + "loss": 0.0, + "step": 1060 + }, + { + "epoch": 1.0562962962962963, + "grad_norm": 0.00011920407268917188, + "learning_rate": 2.68743964895803e-05, + "loss": 0.0, + "step": 1070 + }, + { + "epoch": 1.066172839506173, + "grad_norm": 0.000622686231508851, + "learning_rate": 2.6444052376570677e-05, + "loss": 0.0, + "step": 1080 + }, + { + "epoch": 1.0760493827160493, + "grad_norm": 0.00012072878598701209, + "learning_rate": 2.60132784777954e-05, + "loss": 0.0, + "step": 1090 + }, + { + "epoch": 1.085925925925926, + "grad_norm": 0.00013644102727994323, + "learning_rate": 2.5582203002239757e-05, + "loss": 0.0, + "step": 1100 + }, + { + "epoch": 1.0958024691358024, + "grad_norm": 0.00014129135524854064, + "learning_rate": 2.515095424864577e-05, + "loss": 0.0, + "step": 1110 + }, + { + "epoch": 1.105679012345679, + "grad_norm": 0.00017698659212328494, + "learning_rate": 2.471966056732728e-05, + "loss": 0.0, + "step": 1120 + }, + { + "epoch": 1.1155555555555556, + "grad_norm": 0.00010272293002344668, + "learning_rate": 2.4288450321969752e-05, + "loss": 0.0, + "step": 1130 + }, + { + "epoch": 1.125432098765432, + "grad_norm": 0.00011754959268728271, + "learning_rate": 2.385745185142603e-05, + "loss": 0.0, + "step": 1140 + }, + { + "epoch": 1.1353086419753087, + "grad_norm": 0.00014159196871332824, + "learning_rate": 2.3426793431519524e-05, + "loss": 0.0, + "step": 1150 + }, + { + "epoch": 1.145185185185185, + "grad_norm": 0.00012516119750216603, + "learning_rate": 2.2996603236866168e-05, + "loss": 0.0, + "step": 1160 + }, + { + "epoch": 1.1550617283950617, + "grad_norm": 0.0001459266641177237, + "learning_rate": 2.2567009302726442e-05, + "loss": 0.0, + "step": 1170 + }, + { + "epoch": 1.1649382716049383, + "grad_norm": 0.00011655504204099998, + "learning_rate": 2.2138139486898916e-05, + "loss": 0.0, + "step": 1180 + }, + { + "epoch": 1.1748148148148148, + "grad_norm": 0.0714588537812233, + "learning_rate": 2.171012143166663e-05, + "loss": 0.0002, + "step": 1190 + }, + { + "epoch": 1.1846913580246914, + "grad_norm": 0.00010159167140955105, + "learning_rate": 2.1283082525807554e-05, + "loss": 0.0, + "step": 1200 + }, + { + "epoch": 1.194567901234568, + "grad_norm": 0.00014003751857671887, + "learning_rate": 2.0857149866680555e-05, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 1.2044444444444444, + "grad_norm": 0.00010035983723355457, + "learning_rate": 2.043245022239806e-05, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 1.214320987654321, + "grad_norm": 9.884718747343868e-05, + "learning_rate": 2.000910999409672e-05, + "loss": 0.0, + "step": 1230 + }, + { + "epoch": 1.2241975308641975, + "grad_norm": 9.416105604032055e-05, + "learning_rate": 1.9587255178317327e-05, + "loss": 0.0, + "step": 1240 + }, + { + "epoch": 1.234074074074074, + "grad_norm": 0.0001408099487889558, + "learning_rate": 1.9167011329505064e-05, + "loss": 0.0015, + "step": 1250 + }, + { + "epoch": 1.2439506172839505, + "grad_norm": 9.506545029580593e-05, + "learning_rate": 1.8748503522641487e-05, + "loss": 0.0, + "step": 1260 + }, + { + "epoch": 1.2538271604938271, + "grad_norm": 9.596488962415606e-05, + "learning_rate": 1.8331856316019024e-05, + "loss": 0.0, + "step": 1270 + }, + { + "epoch": 1.2637037037037038, + "grad_norm": 0.00010258956899633631, + "learning_rate": 1.791719371416936e-05, + "loss": 0.0, + "step": 1280 + }, + { + "epoch": 1.2735802469135802, + "grad_norm": 9.748171578394249e-05, + "learning_rate": 1.7504639130956652e-05, + "loss": 0.0, + "step": 1290 + }, + { + "epoch": 1.2834567901234568, + "grad_norm": 8.836873894324526e-05, + "learning_rate": 1.7094315352846473e-05, + "loss": 0.0, + "step": 1300 + }, + { + "epoch": 1.2933333333333334, + "grad_norm": 9.162294736597687e-05, + "learning_rate": 1.6686344502361516e-05, + "loss": 0.0, + "step": 1310 + }, + { + "epoch": 1.3032098765432099, + "grad_norm": 9.847845649346709e-05, + "learning_rate": 1.6280848001734943e-05, + "loss": 0.0, + "step": 1320 + }, + { + "epoch": 1.3130864197530865, + "grad_norm": 0.002509386744350195, + "learning_rate": 1.5877946536772065e-05, + "loss": 0.0, + "step": 1330 + }, + { + "epoch": 1.322962962962963, + "grad_norm": 9.157544263871387e-05, + "learning_rate": 1.5477760020931302e-05, + "loss": 0.0, + "step": 1340 + }, + { + "epoch": 1.3328395061728395, + "grad_norm": 0.00010282491712132469, + "learning_rate": 1.5080407559634929e-05, + "loss": 0.0, + "step": 1350 + }, + { + "epoch": 1.342716049382716, + "grad_norm": 0.13170938193798065, + "learning_rate": 1.468600741482038e-05, + "loss": 0.0001, + "step": 1360 + }, + { + "epoch": 1.3525925925925926, + "grad_norm": 9.943981422111392e-05, + "learning_rate": 1.4294676969742571e-05, + "loss": 0.0, + "step": 1370 + }, + { + "epoch": 1.3624691358024692, + "grad_norm": 8.701250771991909e-05, + "learning_rate": 1.390653269403771e-05, + "loss": 0.0, + "step": 1380 + }, + { + "epoch": 1.3723456790123456, + "grad_norm": 9.3141570687294e-05, + "learning_rate": 1.3521690109059062e-05, + "loss": 0.0017, + "step": 1390 + }, + { + "epoch": 1.3822222222222222, + "grad_norm": 0.00011883996921824291, + "learning_rate": 1.3140263753494903e-05, + "loss": 0.0, + "step": 1400 + }, + { + "epoch": 1.3920987654320989, + "grad_norm": 9.17040160857141e-05, + "learning_rate": 1.276236714927902e-05, + "loss": 0.0, + "step": 1410 + }, + { + "epoch": 1.4019753086419753, + "grad_norm": 0.00010277926048729569, + "learning_rate": 1.2388112767803729e-05, + "loss": 0.0, + "step": 1420 + }, + { + "epoch": 1.411851851851852, + "grad_norm": 8.92517709871754e-05, + "learning_rate": 1.2017611996445644e-05, + "loss": 0.0, + "step": 1430 + }, + { + "epoch": 1.4217283950617283, + "grad_norm": 8.916323713492602e-05, + "learning_rate": 1.1650975105413981e-05, + "loss": 0.0, + "step": 1440 + }, + { + "epoch": 1.431604938271605, + "grad_norm": 8.675019489601254e-05, + "learning_rate": 1.1288311214931446e-05, + "loss": 0.0, + "step": 1450 + }, + { + "epoch": 1.4414814814814814, + "grad_norm": 9.536254219710827e-05, + "learning_rate": 1.092972826275735e-05, + "loss": 0.0, + "step": 1460 + }, + { + "epoch": 1.451358024691358, + "grad_norm": 9.104371565626934e-05, + "learning_rate": 1.057533297206263e-05, + "loss": 0.0, + "step": 1470 + }, + { + "epoch": 1.4612345679012346, + "grad_norm": 9.839163249125704e-05, + "learning_rate": 1.0225230819666431e-05, + "loss": 0.0, + "step": 1480 + }, + { + "epoch": 1.471111111111111, + "grad_norm": 8.303455979330465e-05, + "learning_rate": 9.879526004643586e-06, + "loss": 0.0, + "step": 1490 + }, + { + "epoch": 1.4809876543209877, + "grad_norm": 9.03740365174599e-05, + "learning_rate": 9.538321417312351e-06, + "loss": 0.0, + "step": 1500 + }, + { + "epoch": 1.4809876543209877, + "eval_nlpcc25_task1_dev_accuracy": 0.9967857142857143, + "eval_nlpcc25_task1_dev_loss": 0.020024757832288742, + "eval_nlpcc25_task1_dev_runtime": 3451.5414, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1500 + }, + { + "epoch": 1.4908641975308643, + "grad_norm": 8.933599019655958e-05, + "learning_rate": 9.201718608611729e-06, + "loss": 0.0, + "step": 1510 + }, + { + "epoch": 1.5007407407407407, + "grad_norm": 0.00010863789066206664, + "learning_rate": 8.869817759877388e-06, + "loss": 0.0, + "step": 1520 + }, + { + "epoch": 1.5106172839506171, + "grad_norm": 9.006269829114899e-05, + "learning_rate": 8.542717653025211e-06, + "loss": 0.0, + "step": 1530 + }, + { + "epoch": 1.520493827160494, + "grad_norm": 9.235177276423201e-05, + "learning_rate": 8.220515641151359e-06, + "loss": 0.0, + "step": 1540 + }, + { + "epoch": 1.5303703703703704, + "grad_norm": 9.809488983592018e-05, + "learning_rate": 7.90330761955756e-06, + "loss": 0.0, + "step": 1550 + }, + { + "epoch": 1.5402469135802468, + "grad_norm": 8.182202145690098e-05, + "learning_rate": 7.591187997210305e-06, + "loss": 0.0, + "step": 1560 + }, + { + "epoch": 1.5501234567901234, + "grad_norm": 8.401457307627425e-05, + "learning_rate": 7.284249668642404e-06, + "loss": 0.0, + "step": 1570 + }, + { + "epoch": 1.56, + "grad_norm": 8.956337114796042e-05, + "learning_rate": 6.9825839863052554e-06, + "loss": 0.0, + "step": 1580 + }, + { + "epoch": 1.5698765432098765, + "grad_norm": 0.00010167937580263242, + "learning_rate": 6.686280733380107e-06, + "loss": 0.0, + "step": 1590 + }, + { + "epoch": 1.579753086419753, + "grad_norm": 7.88121105870232e-05, + "learning_rate": 6.395428097056349e-06, + "loss": 0.0, + "step": 1600 + }, + { + "epoch": 1.5896296296296297, + "grad_norm": 8.662124309921637e-05, + "learning_rate": 6.110112642284829e-06, + "loss": 0.0, + "step": 1610 + }, + { + "epoch": 1.5995061728395061, + "grad_norm": 8.408601570408791e-05, + "learning_rate": 5.830419286013969e-06, + "loss": 0.0, + "step": 1620 + }, + { + "epoch": 1.6093827160493828, + "grad_norm": 8.535439701518044e-05, + "learning_rate": 5.5564312719163875e-06, + "loss": 0.0, + "step": 1630 + }, + { + "epoch": 1.6192592592592594, + "grad_norm": 9.110941755352542e-05, + "learning_rate": 5.288230145613498e-06, + "loss": 0.0, + "step": 1640 + }, + { + "epoch": 1.6291358024691358, + "grad_norm": 9.438098641112447e-05, + "learning_rate": 5.025895730405566e-06, + "loss": 0.0, + "step": 1650 + }, + { + "epoch": 1.6390123456790122, + "grad_norm": 9.488565410720184e-05, + "learning_rate": 4.769506103514232e-06, + "loss": 0.0, + "step": 1660 + }, + { + "epoch": 1.6488888888888888, + "grad_norm": 9.606828825781122e-05, + "learning_rate": 4.51913757284487e-06, + "loss": 0.0, + "step": 1670 + }, + { + "epoch": 1.6587654320987655, + "grad_norm": 8.396390330744907e-05, + "learning_rate": 4.274864654275438e-06, + "loss": 0.0, + "step": 1680 + }, + { + "epoch": 1.668641975308642, + "grad_norm": 9.46772561292164e-05, + "learning_rate": 4.036760049478697e-06, + "loss": 0.0, + "step": 1690 + }, + { + "epoch": 1.6785185185185185, + "grad_norm": 7.928520062705502e-05, + "learning_rate": 3.8048946242843995e-06, + "loss": 0.0, + "step": 1700 + }, + { + "epoch": 1.6883950617283952, + "grad_norm": 8.762006473261863e-05, + "learning_rate": 3.5793373875878806e-06, + "loss": 0.0, + "step": 1710 + }, + { + "epoch": 1.6982716049382716, + "grad_norm": 8.157498814398423e-05, + "learning_rate": 3.3601554708112713e-06, + "loss": 0.0, + "step": 1720 + }, + { + "epoch": 1.7081481481481482, + "grad_norm": 7.945331162773073e-05, + "learning_rate": 3.14741410792353e-06, + "loss": 0.0, + "step": 1730 + }, + { + "epoch": 1.7180246913580248, + "grad_norm": 8.328018884640187e-05, + "learning_rate": 2.941176616025215e-06, + "loss": 0.0006, + "step": 1740 + }, + { + "epoch": 1.7279012345679012, + "grad_norm": 8.189202344510704e-05, + "learning_rate": 2.741504376503737e-06, + "loss": 0.0, + "step": 1750 + }, + { + "epoch": 1.7377777777777776, + "grad_norm": 7.249087502714247e-05, + "learning_rate": 2.5484568167647245e-06, + "loss": 0.0, + "step": 1760 + }, + { + "epoch": 1.7476543209876543, + "grad_norm": 7.910580461611971e-05, + "learning_rate": 2.362091392544985e-06, + "loss": 0.0, + "step": 1770 + }, + { + "epoch": 1.757530864197531, + "grad_norm": 7.526679837610573e-05, + "learning_rate": 2.1824635708122267e-06, + "loss": 0.0, + "step": 1780 + }, + { + "epoch": 1.7674074074074073, + "grad_norm": 9.244989632861689e-05, + "learning_rate": 2.0096268132567183e-06, + "loss": 0.0, + "step": 1790 + }, + { + "epoch": 1.777283950617284, + "grad_norm": 7.878772157710046e-05, + "learning_rate": 1.843632560379785e-06, + "loss": 0.0, + "step": 1800 + }, + { + "epoch": 1.7871604938271606, + "grad_norm": 8.294432336697355e-05, + "learning_rate": 1.684530216183805e-06, + "loss": 0.0002, + "step": 1810 + }, + { + "epoch": 1.797037037037037, + "grad_norm": 8.14785817055963e-05, + "learning_rate": 1.5323671334684042e-06, + "loss": 0.0, + "step": 1820 + }, + { + "epoch": 1.8069135802469136, + "grad_norm": 7.684365846216679e-05, + "learning_rate": 1.3871885997370464e-06, + "loss": 0.0, + "step": 1830 + }, + { + "epoch": 1.8167901234567903, + "grad_norm": 7.302551966859028e-05, + "learning_rate": 1.2490378237183658e-06, + "loss": 0.0, + "step": 1840 + }, + { + "epoch": 1.8266666666666667, + "grad_norm": 8.272424020105973e-05, + "learning_rate": 1.1179559225061809e-06, + "loss": 0.0, + "step": 1850 + }, + { + "epoch": 1.836543209876543, + "grad_norm": 8.503070421284065e-05, + "learning_rate": 9.93981909322031e-07, + "loss": 0.0, + "step": 1860 + }, + { + "epoch": 1.8464197530864197, + "grad_norm": 9.047168714459985e-05, + "learning_rate": 8.771526819038644e-07, + "loss": 0.0, + "step": 1870 + }, + { + "epoch": 1.8562962962962963, + "grad_norm": 7.630702020833269e-05, + "learning_rate": 7.675030115243676e-07, + "loss": 0.0, + "step": 1880 + }, + { + "epoch": 1.8661728395061727, + "grad_norm": 8.353806333616376e-05, + "learning_rate": 6.650655326421646e-07, + "loss": 0.0, + "step": 1890 + }, + { + "epoch": 1.8760493827160494, + "grad_norm": 8.572315709898248e-05, + "learning_rate": 5.698707331890013e-07, + "loss": 0.0, + "step": 1900 + }, + { + "epoch": 1.885925925925926, + "grad_norm": 8.084969158517197e-05, + "learning_rate": 4.819469454957787e-07, + "loss": 0.0, + "step": 1910 + }, + { + "epoch": 1.8958024691358024, + "grad_norm": 0.17167682945728302, + "learning_rate": 4.013203378601449e-07, + "loss": 0.0028, + "step": 1920 + }, + { + "epoch": 1.905679012345679, + "grad_norm": 0.00012432083894964308, + "learning_rate": 3.2801490675817283e-07, + "loss": 0.0, + "step": 1930 + }, + { + "epoch": 1.9155555555555557, + "grad_norm": 8.875240746419877e-05, + "learning_rate": 2.6205246970239514e-07, + "loss": 0.0, + "step": 1940 + }, + { + "epoch": 1.925432098765432, + "grad_norm": 8.457344665657729e-05, + "learning_rate": 2.0345265874839593e-07, + "loss": 0.0, + "step": 1950 + }, + { + "epoch": 1.9353086419753085, + "grad_norm": 0.00028203128022141755, + "learning_rate": 1.522329146518009e-07, + "loss": 0.0, + "step": 1960 + }, + { + "epoch": 1.9451851851851854, + "grad_norm": 0.00013342279999051243, + "learning_rate": 1.0840848167749385e-07, + "loss": 0.0, + "step": 1970 + }, + { + "epoch": 1.9550617283950618, + "grad_norm": 8.94103359314613e-05, + "learning_rate": 7.199240306254296e-08, + "loss": 0.0, + "step": 1980 + }, + { + "epoch": 1.9649382716049382, + "grad_norm": 7.630888285348192e-05, + "learning_rate": 4.299551713420058e-08, + "loss": 0.0, + "step": 1990 + }, + { + "epoch": 1.9748148148148148, + "grad_norm": 7.469132106052712e-05, + "learning_rate": 2.1426454084153003e-08, + "loss": 0.0, + "step": 2000 + }, + { + "epoch": 1.9748148148148148, + "eval_nlpcc25_task1_dev_accuracy": 0.996525974025974, + "eval_nlpcc25_task1_dev_loss": 0.021287396550178528, + "eval_nlpcc25_task1_dev_runtime": 3451.6242, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 2000 + }, + { + "epoch": 1.9846913580246914, + "grad_norm": 0.00013060342462267727, + "learning_rate": 7.2916333999722975e-09, + "loss": 0.0, + "step": 2010 + }, + { + "epoch": 1.9945679012345678, + "grad_norm": 7.544880645582452e-05, + "learning_rate": 5.952619545002147e-10, + "loss": 0.0, + "step": 2020 + } + ], + "logging_steps": 10, + "max_steps": 2024, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6877866202879754e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/training_args.bin b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dca977cd6736713fb5f4fb68d38053f8e5f452eb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-2024/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95434493472159610d4c2dd6dc378ebbfd2ed878eb7c793a26ac8ce723b446c0 +size 5752 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/README.md b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b3b3f81cc13b36bc8025f54bbde88be77c46fd5 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/README.md @@ -0,0 +1,202 @@ +--- +base_model: /data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.0 \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/adapter_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5914f072a289e8f8e163cee6ef6f15c727e47c85 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/data/zhaoguoyu/Experiments/mgtd-sys/detector/ckpt/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "gate_proj", + "down_proj", + "q_proj", + "up_proj", + "v_proj", + "k_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/adapter_model.safetensors b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28e6a81c0a7eeb41a75d23f2922bda8c838b7537 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:131c06472b8e63fb021b2ea08e528d6250935c40ccc2f66f2ba2973ec2cd89a1 +size 83945296 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/optimizer.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d0dc92e9c2c47d45c200f59b8223011e8b6ed6 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a195af4266ec6165e910060d0c83cf63cebd9f8bdd608a452e59c41817687e +size 168149074 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/rng_state.pth b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33cefe6919222ddfa3c3946df69b8e5c5a17a0fc --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36 +size 14244 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/scheduler.pt b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9366b2665dde0c613ca4ea4fe22f7959fd8b6ba8 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad9a8469d07c56013ce9fa0e5ecbe85c4764ebbe5407f35d9ab0bd3875cae9f +size 1064 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/special_tokens_map.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c3a478b842fa66e6a8c10265478284c1d4f41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer_config.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ca9fbea85debf5fb15193ad8ef61d682f121c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/trainer_state.json b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..95474a907d8475b76d07cc50ccbe5bdb411b4842 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/trainer_state.json @@ -0,0 +1,393 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.49382716049382713, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009876543209876543, + "grad_norm": 8.173213958740234, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.3879, + "step": 10 + }, + { + "epoch": 0.019753086419753086, + "grad_norm": 7.614120960235596, + "learning_rate": 4.926108374384237e-06, + "loss": 1.3407, + "step": 20 + }, + { + "epoch": 0.02962962962962963, + "grad_norm": 6.4321064949035645, + "learning_rate": 7.3891625615763555e-06, + "loss": 1.1236, + "step": 30 + }, + { + "epoch": 0.03950617283950617, + "grad_norm": 2.8025548458099365, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5763, + "step": 40 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 0.7712829113006592, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.1632, + "step": 50 + }, + { + "epoch": 0.05925925925925926, + "grad_norm": 0.35205233097076416, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.0571, + "step": 60 + }, + { + "epoch": 0.0691358024691358, + "grad_norm": 0.9003845453262329, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.0374, + "step": 70 + }, + { + "epoch": 0.07901234567901234, + "grad_norm": 1.4704982042312622, + "learning_rate": 1.970443349753695e-05, + "loss": 0.0226, + "step": 80 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 2.574737310409546, + "learning_rate": 2.2167487684729066e-05, + "loss": 0.0129, + "step": 90 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 5.429731369018555, + "learning_rate": 2.4630541871921184e-05, + "loss": 0.0239, + "step": 100 + }, + { + "epoch": 0.10864197530864197, + "grad_norm": 0.17149314284324646, + "learning_rate": 2.70935960591133e-05, + "loss": 0.0102, + "step": 110 + }, + { + "epoch": 0.11851851851851852, + "grad_norm": 0.3575906753540039, + "learning_rate": 2.9556650246305422e-05, + "loss": 0.0089, + "step": 120 + }, + { + "epoch": 0.12839506172839507, + "grad_norm": 0.30634525418281555, + "learning_rate": 3.2019704433497536e-05, + "loss": 0.0028, + "step": 130 + }, + { + "epoch": 0.1382716049382716, + "grad_norm": 0.006271605845540762, + "learning_rate": 3.4482758620689657e-05, + "loss": 0.0006, + "step": 140 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.01288707833737135, + "learning_rate": 3.694581280788178e-05, + "loss": 0.0008, + "step": 150 + }, + { + "epoch": 0.1580246913580247, + "grad_norm": 0.004254512023180723, + "learning_rate": 3.94088669950739e-05, + "loss": 0.0008, + "step": 160 + }, + { + "epoch": 0.16790123456790124, + "grad_norm": 0.017511000856757164, + "learning_rate": 4.187192118226601e-05, + "loss": 0.0051, + "step": 170 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.001938911504112184, + "learning_rate": 4.433497536945813e-05, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.18765432098765433, + "grad_norm": 0.5761703848838806, + "learning_rate": 4.679802955665025e-05, + "loss": 0.0005, + "step": 190 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 0.0074734254740178585, + "learning_rate": 4.926108374384237e-05, + "loss": 0.0003, + "step": 200 + }, + { + "epoch": 0.2074074074074074, + "grad_norm": 0.0016212465707212687, + "learning_rate": 4.9998177025185267e-05, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 0.21728395061728395, + "grad_norm": 0.0033307652920484543, + "learning_rate": 4.998924880895427e-05, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.2271604938271605, + "grad_norm": 0.009896264411509037, + "learning_rate": 4.997288317313464e-05, + "loss": 0.001, + "step": 230 + }, + { + "epoch": 0.23703703703703705, + "grad_norm": 0.00731613440439105, + "learning_rate": 4.994908498854508e-05, + "loss": 0.0021, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 0.01958666928112507, + "learning_rate": 4.991786133811494e-05, + "loss": 0.0024, + "step": 250 + }, + { + "epoch": 0.25679012345679014, + "grad_norm": 0.008584077470004559, + "learning_rate": 4.9879221514776196e-05, + "loss": 0.0002, + "step": 260 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.0017416627379134297, + "learning_rate": 4.983317701869765e-05, + "loss": 0.0001, + "step": 270 + }, + { + "epoch": 0.2765432098765432, + "grad_norm": 0.8311202526092529, + "learning_rate": 4.977974155386214e-05, + "loss": 0.0014, + "step": 280 + }, + { + "epoch": 0.28641975308641976, + "grad_norm": 0.06173387169837952, + "learning_rate": 4.9718931023987926e-05, + "loss": 0.0043, + "step": 290 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.011783963069319725, + "learning_rate": 4.9650763527795385e-05, + "loss": 0.0032, + "step": 300 + }, + { + "epoch": 0.30617283950617286, + "grad_norm": 0.0035617423709481955, + "learning_rate": 4.9575259353620305e-05, + "loss": 0.0067, + "step": 310 + }, + { + "epoch": 0.3160493827160494, + "grad_norm": 0.007925956510007381, + "learning_rate": 4.949244097337567e-05, + "loss": 0.002, + "step": 320 + }, + { + "epoch": 0.32592592592592595, + "grad_norm": 0.00402588676661253, + "learning_rate": 4.9402333035863344e-05, + "loss": 0.0005, + "step": 330 + }, + { + "epoch": 0.3358024691358025, + "grad_norm": 0.00444524921476841, + "learning_rate": 4.930496235943811e-05, + "loss": 0.0001, + "step": 340 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 0.004133024252951145, + "learning_rate": 4.9200357924025755e-05, + "loss": 0.001, + "step": 350 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.0026975509244948626, + "learning_rate": 4.9088550862497966e-05, + "loss": 0.0042, + "step": 360 + }, + { + "epoch": 0.3654320987654321, + "grad_norm": 0.0009604657534509897, + "learning_rate": 4.8969574451406445e-05, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.37530864197530867, + "grad_norm": 0.0007526307599619031, + "learning_rate": 4.8843464101078924e-05, + "loss": 0.0005, + "step": 380 + }, + { + "epoch": 0.3851851851851852, + "grad_norm": 0.034833673387765884, + "learning_rate": 4.871025734508022e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 0.0006839093985036016, + "learning_rate": 4.85699938290413e-05, + "loss": 0.0011, + "step": 400 + }, + { + "epoch": 0.4049382716049383, + "grad_norm": 0.17423595488071442, + "learning_rate": 4.842271529885978e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 0.4148148148148148, + "grad_norm": 0.0021370204631239176, + "learning_rate": 4.8268465588275235e-05, + "loss": 0.0021, + "step": 420 + }, + { + "epoch": 0.4246913580246914, + "grad_norm": 0.002740511205047369, + "learning_rate": 4.8107290605823306e-05, + "loss": 0.0007, + "step": 430 + }, + { + "epoch": 0.4345679012345679, + "grad_norm": 0.5583088397979736, + "learning_rate": 4.79392383211721e-05, + "loss": 0.001, + "step": 440 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.0016133144963532686, + "learning_rate": 4.776435875084526e-05, + "loss": 0.0001, + "step": 450 + }, + { + "epoch": 0.454320987654321, + "grad_norm": 0.008442943915724754, + "learning_rate": 4.7582703943335785e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 0.4641975308641975, + "grad_norm": 0.0007343398174270988, + "learning_rate": 4.739432796361515e-05, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.4740740740740741, + "grad_norm": 0.0006774268113076687, + "learning_rate": 4.719928687704218e-05, + "loss": 0.0, + "step": 480 + }, + { + "epoch": 0.4839506172839506, + "grad_norm": 0.0005065005389042199, + "learning_rate": 4.699763873267667e-05, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 0.0007372256950475276, + "learning_rate": 4.678944354600249e-05, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.49382716049382713, + "eval_nlpcc25_task1_dev_accuracy": 0.9935714285714285, + "eval_nlpcc25_task1_dev_loss": 0.038043826818466187, + "eval_nlpcc25_task1_dev_runtime": 3452.1571, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 500 + } + ], + "logging_steps": 10, + "max_steps": 2024, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.158875259442299e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/training_args.bin b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dca977cd6736713fb5f4fb68d38053f8e5f452eb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95434493472159610d4c2dd6dc378ebbfd2ed878eb7c793a26ac8ce723b446c0 +size 5752 diff --git a/Llama-3.1-8B-Instruct-lora-2/eval_results.json b/Llama-3.1-8B-Instruct-lora-2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..063b8a631ae78cf84e292513e33a5ee74a7bfb41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.9985185185185186, + "eval_nlpcc25_task1_dev_accuracy": 0.9964935064935064, + "eval_nlpcc25_task1_dev_loss": 0.021377403289079666, + "eval_nlpcc25_task1_dev_runtime": 3451.6717, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203 +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/special_tokens_map.json b/Llama-3.1-8B-Instruct-lora-2/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..df5c3a478b842fa66e6a8c10265478284c1d4f41 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/tokenizer.json b/Llama-3.1-8B-Instruct-lora-2/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/Llama-3.1-8B-Instruct-lora-2/tokenizer_config.json b/Llama-3.1-8B-Instruct-lora-2/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60ca9fbea85debf5fb15193ad8ef61d682f121c --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/tokenizer_config.json @@ -0,0 +1,2070 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/Llama-3.1-8B-Instruct-lora-2/train_results.json b/Llama-3.1-8B-Instruct-lora-2/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..121ff9d7fa875cd33312885d338353f54c0d4ea2 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.9985185185185186, + "total_flos": 1.6877866202879754e+18, + "train_loss": 0.023970503359357204, + "train_runtime": 210819.9785, + "train_samples_per_second": 0.307, + "train_steps_per_second": 0.01 +} \ No newline at end of file diff --git a/Llama-3.1-8B-Instruct-lora-2/trainer_log.jsonl b/Llama-3.1-8B-Instruct-lora-2/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b23f1e3cc2246f97801b886ecf365905993c8768 --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/trainer_log.jsonl @@ -0,0 +1,207 @@ +{"current_steps": 10, "total_steps": 2024, "loss": 1.3879, "lr": 2.4630541871921186e-06, "epoch": 0.009876543209876543, "percentage": 0.49, "elapsed_time": "0:15:54", "remaining_time": "2 days, 5:24:07"} +{"current_steps": 20, "total_steps": 2024, "loss": 1.3407, "lr": 4.926108374384237e-06, "epoch": 0.019753086419753086, "percentage": 0.99, "elapsed_time": "0:32:03", "remaining_time": "2 days, 5:31:45"} +{"current_steps": 30, "total_steps": 2024, "loss": 1.1236, "lr": 7.3891625615763555e-06, "epoch": 0.02962962962962963, "percentage": 1.48, "elapsed_time": "0:47:51", "remaining_time": "2 days, 5:00:32"} +{"current_steps": 40, "total_steps": 2024, "loss": 0.5763, "lr": 9.852216748768475e-06, "epoch": 0.03950617283950617, "percentage": 1.98, "elapsed_time": "1:04:16", "remaining_time": "2 days, 5:07:57"} +{"current_steps": 50, "total_steps": 2024, "loss": 0.1632, "lr": 1.2315270935960592e-05, "epoch": 0.04938271604938271, "percentage": 2.47, "elapsed_time": "1:20:48", "remaining_time": "2 days, 5:09:59"} +{"current_steps": 60, "total_steps": 2024, "loss": 0.0571, "lr": 1.4778325123152711e-05, "epoch": 0.05925925925925926, "percentage": 2.96, "elapsed_time": "1:37:14", "remaining_time": "2 days, 5:03:17"} +{"current_steps": 70, "total_steps": 2024, "loss": 0.0374, "lr": 1.7241379310344828e-05, "epoch": 0.0691358024691358, "percentage": 3.46, "elapsed_time": "1:53:03", "remaining_time": "2 days, 4:35:50"} +{"current_steps": 80, "total_steps": 2024, "loss": 0.0226, "lr": 1.970443349753695e-05, "epoch": 0.07901234567901234, "percentage": 3.95, "elapsed_time": "2:08:52", "remaining_time": "2 days, 4:11:48"} +{"current_steps": 90, "total_steps": 2024, "loss": 0.0129, "lr": 2.2167487684729066e-05, "epoch": 0.08888888888888889, "percentage": 4.45, "elapsed_time": "2:25:11", "remaining_time": "2 days, 4:00:08"} +{"current_steps": 100, "total_steps": 2024, "loss": 0.0239, "lr": 2.4630541871921184e-05, "epoch": 0.09876543209876543, "percentage": 4.94, "elapsed_time": "2:41:25", "remaining_time": "2 days, 3:45:58"} +{"current_steps": 110, "total_steps": 2024, "loss": 0.0102, "lr": 2.70935960591133e-05, "epoch": 0.10864197530864197, "percentage": 5.43, "elapsed_time": "2:57:19", "remaining_time": "2 days, 3:25:29"} +{"current_steps": 120, "total_steps": 2024, "loss": 0.0089, "lr": 2.9556650246305422e-05, "epoch": 0.11851851851851852, "percentage": 5.93, "elapsed_time": "3:13:34", "remaining_time": "2 days, 3:11:19"} +{"current_steps": 130, "total_steps": 2024, "loss": 0.0028, "lr": 3.2019704433497536e-05, "epoch": 0.12839506172839507, "percentage": 6.42, "elapsed_time": "3:29:53", "remaining_time": "2 days, 2:57:50"} +{"current_steps": 140, "total_steps": 2024, "loss": 0.0006, "lr": 3.4482758620689657e-05, "epoch": 0.1382716049382716, "percentage": 6.92, "elapsed_time": "3:46:07", "remaining_time": "2 days, 2:42:56"} +{"current_steps": 150, "total_steps": 2024, "loss": 0.0008, "lr": 3.694581280788178e-05, "epoch": 0.14814814814814814, "percentage": 7.41, "elapsed_time": "4:01:55", "remaining_time": "2 days, 2:22:21"} +{"current_steps": 160, "total_steps": 2024, "loss": 0.0008, "lr": 3.94088669950739e-05, "epoch": 0.1580246913580247, "percentage": 7.91, "elapsed_time": "4:18:21", "remaining_time": "2 days, 2:09:53"} +{"current_steps": 170, "total_steps": 2024, "loss": 0.0051, "lr": 4.187192118226601e-05, "epoch": 0.16790123456790124, "percentage": 8.4, "elapsed_time": "4:34:21", "remaining_time": "2 days, 1:52:03"} +{"current_steps": 180, "total_steps": 2024, "loss": 0.0002, "lr": 4.433497536945813e-05, "epoch": 0.17777777777777778, "percentage": 8.89, "elapsed_time": "4:50:33", "remaining_time": "2 days, 1:36:34"} +{"current_steps": 190, "total_steps": 2024, "loss": 0.0005, "lr": 4.679802955665025e-05, "epoch": 0.18765432098765433, "percentage": 9.39, "elapsed_time": "5:06:46", "remaining_time": "2 days, 1:21:10"} +{"current_steps": 200, "total_steps": 2024, "loss": 0.0003, "lr": 4.926108374384237e-05, "epoch": 0.19753086419753085, "percentage": 9.88, "elapsed_time": "5:22:45", "remaining_time": "2 days, 1:03:30"} +{"current_steps": 210, "total_steps": 2024, "loss": 0.0011, "lr": 4.9998177025185267e-05, "epoch": 0.2074074074074074, "percentage": 10.38, "elapsed_time": "5:38:36", "remaining_time": "2 days, 0:44:52"} +{"current_steps": 220, "total_steps": 2024, "loss": 0.0009, "lr": 4.998924880895427e-05, "epoch": 0.21728395061728395, "percentage": 10.87, "elapsed_time": "5:55:06", "remaining_time": "2 days, 0:31:49"} +{"current_steps": 230, "total_steps": 2024, "loss": 0.001, "lr": 4.997288317313464e-05, "epoch": 0.2271604938271605, "percentage": 11.36, "elapsed_time": "6:11:12", "remaining_time": "2 days, 0:15:27"} +{"current_steps": 240, "total_steps": 2024, "loss": 0.0021, "lr": 4.994908498854508e-05, "epoch": 0.23703703703703705, "percentage": 11.86, "elapsed_time": "6:27:28", "remaining_time": "2 days, 0:00:15"} +{"current_steps": 250, "total_steps": 2024, "loss": 0.0024, "lr": 4.991786133811494e-05, "epoch": 0.24691358024691357, "percentage": 12.35, "elapsed_time": "6:43:48", "remaining_time": "1 day, 23:45:23"} +{"current_steps": 260, "total_steps": 2024, "loss": 0.0002, "lr": 4.9879221514776196e-05, "epoch": 0.25679012345679014, "percentage": 12.85, "elapsed_time": "7:00:11", "remaining_time": "1 day, 23:30:51"} +{"current_steps": 270, "total_steps": 2024, "loss": 0.0001, "lr": 4.983317701869765e-05, "epoch": 0.26666666666666666, "percentage": 13.34, "elapsed_time": "7:16:00", "remaining_time": "1 day, 23:12:29"} +{"current_steps": 280, "total_steps": 2024, "loss": 0.0014, "lr": 4.977974155386214e-05, "epoch": 0.2765432098765432, "percentage": 13.83, "elapsed_time": "7:31:42", "remaining_time": "1 day, 22:53:27"} +{"current_steps": 290, "total_steps": 2024, "loss": 0.0043, "lr": 4.9718931023987926e-05, "epoch": 0.28641975308641976, "percentage": 14.33, "elapsed_time": "7:47:30", "remaining_time": "1 day, 22:35:22"} +{"current_steps": 300, "total_steps": 2024, "loss": 0.0032, "lr": 4.9650763527795385e-05, "epoch": 0.2962962962962963, "percentage": 14.82, "elapsed_time": "8:03:54", "remaining_time": "1 day, 22:20:53"} +{"current_steps": 310, "total_steps": 2024, "loss": 0.0067, "lr": 4.9575259353620305e-05, "epoch": 0.30617283950617286, "percentage": 15.32, "elapsed_time": "8:20:13", "remaining_time": "1 day, 22:05:46"} +{"current_steps": 320, "total_steps": 2024, "loss": 0.002, "lr": 4.949244097337567e-05, "epoch": 0.3160493827160494, "percentage": 15.81, "elapsed_time": "8:36:22", "remaining_time": "1 day, 21:49:39"} +{"current_steps": 330, "total_steps": 2024, "loss": 0.0005, "lr": 4.9402333035863344e-05, "epoch": 0.32592592592592595, "percentage": 16.3, "elapsed_time": "8:52:39", "remaining_time": "1 day, 21:34:20"} +{"current_steps": 340, "total_steps": 2024, "loss": 0.0001, "lr": 4.930496235943811e-05, "epoch": 0.3358024691358025, "percentage": 16.8, "elapsed_time": "9:08:38", "remaining_time": "1 day, 21:17:21"} +{"current_steps": 350, "total_steps": 2024, "loss": 0.001, "lr": 4.9200357924025755e-05, "epoch": 0.345679012345679, "percentage": 17.29, "elapsed_time": "9:24:57", "remaining_time": "1 day, 21:02:05"} +{"current_steps": 360, "total_steps": 2024, "loss": 0.0042, "lr": 4.9088550862497966e-05, "epoch": 0.35555555555555557, "percentage": 17.79, "elapsed_time": "9:41:36", "remaining_time": "1 day, 20:48:19"} +{"current_steps": 370, "total_steps": 2024, "loss": 0.0004, "lr": 4.8969574451406445e-05, "epoch": 0.3654320987654321, "percentage": 18.28, "elapsed_time": "9:57:51", "remaining_time": "1 day, 20:32:33"} +{"current_steps": 380, "total_steps": 2024, "loss": 0.0005, "lr": 4.8843464101078924e-05, "epoch": 0.37530864197530867, "percentage": 18.77, "elapsed_time": "10:13:51", "remaining_time": "1 day, 20:15:44"} +{"current_steps": 390, "total_steps": 2024, "loss": 0.0002, "lr": 4.871025734508022e-05, "epoch": 0.3851851851851852, "percentage": 19.27, "elapsed_time": "10:29:33", "remaining_time": "1 day, 19:57:42"} +{"current_steps": 400, "total_steps": 2024, "loss": 0.0011, "lr": 4.85699938290413e-05, "epoch": 0.3950617283950617, "percentage": 19.76, "elapsed_time": "10:45:35", "remaining_time": "1 day, 19:41:06"} +{"current_steps": 410, "total_steps": 2024, "loss": 0.0002, "lr": 4.842271529885978e-05, "epoch": 0.4049382716049383, "percentage": 20.26, "elapsed_time": "11:01:58", "remaining_time": "1 day, 19:25:55"} +{"current_steps": 420, "total_steps": 2024, "loss": 0.0021, "lr": 4.8268465588275235e-05, "epoch": 0.4148148148148148, "percentage": 20.75, "elapsed_time": "11:17:53", "remaining_time": "1 day, 19:08:52"} +{"current_steps": 430, "total_steps": 2024, "loss": 0.0007, "lr": 4.8107290605823306e-05, "epoch": 0.4246913580246914, "percentage": 21.25, "elapsed_time": "11:33:59", "remaining_time": "1 day, 18:52:34"} +{"current_steps": 440, "total_steps": 2024, "loss": 0.001, "lr": 4.79392383211721e-05, "epoch": 0.4345679012345679, "percentage": 21.74, "elapsed_time": "11:50:12", "remaining_time": "1 day, 18:36:46"} +{"current_steps": 450, "total_steps": 2024, "loss": 0.0001, "lr": 4.776435875084526e-05, "epoch": 0.4444444444444444, "percentage": 22.23, "elapsed_time": "12:06:58", "remaining_time": "1 day, 18:22:48"} +{"current_steps": 460, "total_steps": 2024, "loss": 0.0001, "lr": 4.7582703943335785e-05, "epoch": 0.454320987654321, "percentage": 22.73, "elapsed_time": "12:23:04", "remaining_time": "1 day, 18:06:28"} +{"current_steps": 470, "total_steps": 2024, "loss": 0.0003, "lr": 4.739432796361515e-05, "epoch": 0.4641975308641975, "percentage": 23.22, "elapsed_time": "12:40:08", "remaining_time": "1 day, 17:53:20"} +{"current_steps": 480, "total_steps": 2024, "loss": 0.0, "lr": 4.719928687704218e-05, "epoch": 0.4740740740740741, "percentage": 23.72, "elapsed_time": "12:56:36", "remaining_time": "1 day, 17:38:04"} +{"current_steps": 490, "total_steps": 2024, "loss": 0.0, "lr": 4.699763873267667e-05, "epoch": 0.4839506172839506, "percentage": 24.21, "elapsed_time": "13:12:35", "remaining_time": "1 day, 17:21:18"} +{"current_steps": 500, "total_steps": 2024, "loss": 0.0002, "lr": 4.678944354600249e-05, "epoch": 0.49382716049382713, "percentage": 24.7, "elapsed_time": "13:29:20", "remaining_time": "1 day, 17:06:51"} +{"current_steps": 500, "total_steps": 2024, "epoch": 0.49382716049382713, "percentage": 24.7, "elapsed_time": "14:26:52", "remaining_time": "1 day, 20:02:13"} +{"current_steps": 510, "total_steps": 2024, "loss": 0.0002, "lr": 4.65747632810655e-05, "epoch": 0.5037037037037037, "percentage": 25.2, "elapsed_time": "14:42:44", "remaining_time": "1 day, 19:40:32"} +{"current_steps": 520, "total_steps": 2024, "loss": 0.0101, "lr": 4.635366183203157e-05, "epoch": 0.5135802469135803, "percentage": 25.69, "elapsed_time": "14:58:48", "remaining_time": "1 day, 19:19:37"} +{"current_steps": 530, "total_steps": 2024, "loss": 0.0, "lr": 4.612620500417001e-05, "epoch": 0.5234567901234568, "percentage": 26.19, "elapsed_time": "15:14:55", "remaining_time": "1 day, 18:59:03"} +{"current_steps": 540, "total_steps": 2024, "loss": 0.0, "lr": 4.589246049426835e-05, "epoch": 0.5333333333333333, "percentage": 26.68, "elapsed_time": "15:31:43", "remaining_time": "1 day, 18:40:32"} +{"current_steps": 550, "total_steps": 2024, "loss": 0.0005, "lr": 4.565249787048408e-05, "epoch": 0.5432098765432098, "percentage": 27.17, "elapsed_time": "15:48:04", "remaining_time": "1 day, 18:20:50"} +{"current_steps": 560, "total_steps": 2024, "loss": 0.0, "lr": 4.5406388551639436e-05, "epoch": 0.5530864197530864, "percentage": 27.67, "elapsed_time": "16:04:45", "remaining_time": "1 day, 18:02:10"} +{"current_steps": 570, "total_steps": 2024, "loss": 0.0032, "lr": 4.515420578596542e-05, "epoch": 0.562962962962963, "percentage": 28.16, "elapsed_time": "16:21:06", "remaining_time": "1 day, 17:42:40"} +{"current_steps": 580, "total_steps": 2024, "loss": 0.0, "lr": 4.489602462930126e-05, "epoch": 0.5728395061728395, "percentage": 28.66, "elapsed_time": "16:37:20", "remaining_time": "1 day, 17:23:02"} +{"current_steps": 590, "total_steps": 2024, "loss": 0.0, "lr": 4.4631921922755985e-05, "epoch": 0.582716049382716, "percentage": 29.15, "elapsed_time": "16:53:19", "remaining_time": "1 day, 17:02:53"} +{"current_steps": 600, "total_steps": 2024, "loss": 0.0, "lr": 4.436197626983855e-05, "epoch": 0.5925925925925926, "percentage": 29.64, "elapsed_time": "17:09:54", "remaining_time": "1 day, 16:44:19"} +{"current_steps": 610, "total_steps": 2024, "loss": 0.0, "lr": 4.4086268013063556e-05, "epoch": 0.6024691358024692, "percentage": 30.14, "elapsed_time": "17:26:16", "remaining_time": "1 day, 16:25:17"} +{"current_steps": 620, "total_steps": 2024, "loss": 0.0, "lr": 4.3804879210039275e-05, "epoch": 0.6123456790123457, "percentage": 30.63, "elapsed_time": "17:42:36", "remaining_time": "1 day, 16:06:17"} +{"current_steps": 630, "total_steps": 2024, "loss": 0.0, "lr": 4.351789360904527e-05, "epoch": 0.6222222222222222, "percentage": 31.13, "elapsed_time": "17:58:59", "remaining_time": "1 day, 15:47:27"} +{"current_steps": 640, "total_steps": 2024, "loss": 0.0, "lr": 4.322539662410687e-05, "epoch": 0.6320987654320988, "percentage": 31.62, "elapsed_time": "18:14:56", "remaining_time": "1 day, 15:27:47"} +{"current_steps": 650, "total_steps": 2024, "loss": 0.0005, "lr": 4.29274753095738e-05, "epoch": 0.6419753086419753, "percentage": 32.11, "elapsed_time": "18:31:34", "remaining_time": "1 day, 15:09:41"} +{"current_steps": 660, "total_steps": 2024, "loss": 0.0016, "lr": 4.262421833421069e-05, "epoch": 0.6518518518518519, "percentage": 32.61, "elapsed_time": "18:47:41", "remaining_time": "1 day, 14:50:33"} +{"current_steps": 670, "total_steps": 2024, "loss": 0.0007, "lr": 4.2315715954807e-05, "epoch": 0.6617283950617284, "percentage": 33.1, "elapsed_time": "19:04:06", "remaining_time": "1 day, 14:32:07"} +{"current_steps": 680, "total_steps": 2024, "loss": 0.0, "lr": 4.200205998931442e-05, "epoch": 0.671604938271605, "percentage": 33.6, "elapsed_time": "19:20:50", "remaining_time": "1 day, 14:14:21"} +{"current_steps": 690, "total_steps": 2024, "loss": 0.0, "lr": 4.1683343789519544e-05, "epoch": 0.6814814814814815, "percentage": 34.09, "elapsed_time": "19:36:53", "remaining_time": "1 day, 13:55:19"} +{"current_steps": 700, "total_steps": 2024, "loss": 0.0002, "lr": 4.135966221326007e-05, "epoch": 0.691358024691358, "percentage": 34.58, "elapsed_time": "19:53:11", "remaining_time": "1 day, 13:36:49"} +{"current_steps": 710, "total_steps": 2024, "loss": 0.0, "lr": 4.103111159619274e-05, "epoch": 0.7012345679012346, "percentage": 35.08, "elapsed_time": "20:09:29", "remaining_time": "1 day, 13:18:24"} +{"current_steps": 720, "total_steps": 2024, "loss": 0.0024, "lr": 4.0697789723121485e-05, "epoch": 0.7111111111111111, "percentage": 35.57, "elapsed_time": "20:25:59", "remaining_time": "1 day, 13:00:24"} +{"current_steps": 730, "total_steps": 2024, "loss": 0.0, "lr": 4.035979579889424e-05, "epoch": 0.7209876543209877, "percentage": 36.07, "elapsed_time": "20:41:53", "remaining_time": "1 day, 12:41:22"} +{"current_steps": 740, "total_steps": 2024, "loss": 0.0, "lr": 4.001723041887713e-05, "epoch": 0.7308641975308642, "percentage": 36.56, "elapsed_time": "20:57:58", "remaining_time": "1 day, 12:22:46"} +{"current_steps": 750, "total_steps": 2024, "loss": 0.0, "lr": 3.967019553901477e-05, "epoch": 0.7407407407407407, "percentage": 37.06, "elapsed_time": "21:14:13", "remaining_time": "1 day, 12:04:29"} +{"current_steps": 760, "total_steps": 2024, "loss": 0.0, "lr": 3.931879444548568e-05, "epoch": 0.7506172839506173, "percentage": 37.55, "elapsed_time": "21:30:00", "remaining_time": "1 day, 11:45:29"} +{"current_steps": 770, "total_steps": 2024, "loss": 0.0, "lr": 3.8963131723961734e-05, "epoch": 0.7604938271604939, "percentage": 38.04, "elapsed_time": "21:46:33", "remaining_time": "1 day, 11:27:50"} +{"current_steps": 780, "total_steps": 2024, "loss": 0.0, "lr": 3.860331322848091e-05, "epoch": 0.7703703703703704, "percentage": 38.54, "elapsed_time": "22:03:12", "remaining_time": "1 day, 11:10:20"} +{"current_steps": 790, "total_steps": 2024, "loss": 0.0, "lr": 3.823944604994243e-05, "epoch": 0.7802469135802469, "percentage": 39.03, "elapsed_time": "22:19:29", "remaining_time": "1 day, 10:52:19"} +{"current_steps": 800, "total_steps": 2024, "loss": 0.0016, "lr": 3.7871638484233966e-05, "epoch": 0.7901234567901234, "percentage": 39.53, "elapsed_time": "22:35:19", "remaining_time": "1 day, 10:33:39"} +{"current_steps": 810, "total_steps": 2024, "loss": 0.0021, "lr": 3.7500000000000003e-05, "epoch": 0.8, "percentage": 40.02, "elapsed_time": "22:51:10", "remaining_time": "1 day, 10:15:03"} +{"current_steps": 820, "total_steps": 2024, "loss": 0.0004, "lr": 3.71246412060613e-05, "epoch": 0.8098765432098766, "percentage": 40.51, "elapsed_time": "23:07:28", "remaining_time": "1 day, 9:57:13"} +{"current_steps": 830, "total_steps": 2024, "loss": 0.0001, "lr": 3.674567381849498e-05, "epoch": 0.8197530864197531, "percentage": 41.01, "elapsed_time": "23:23:49", "remaining_time": "1 day, 9:39:28"} +{"current_steps": 840, "total_steps": 2024, "loss": 0.0001, "lr": 3.6363210627385004e-05, "epoch": 0.8296296296296296, "percentage": 41.5, "elapsed_time": "23:40:16", "remaining_time": "1 day, 9:21:55"} +{"current_steps": 850, "total_steps": 2024, "loss": 0.0001, "lr": 3.59773654632531e-05, "epoch": 0.8395061728395061, "percentage": 42.0, "elapsed_time": "23:57:01", "remaining_time": "1 day, 9:04:47"} +{"current_steps": 860, "total_steps": 2024, "loss": 0.0, "lr": 3.558825316317998e-05, "epoch": 0.8493827160493828, "percentage": 42.49, "elapsed_time": "1 day, 0:13:18", "remaining_time": "1 day, 8:47:01"} +{"current_steps": 870, "total_steps": 2024, "loss": 0.0, "lr": 3.5195989536626925e-05, "epoch": 0.8592592592592593, "percentage": 42.98, "elapsed_time": "1 day, 0:28:44", "remaining_time": "1 day, 8:28:12"} +{"current_steps": 880, "total_steps": 2024, "loss": 0.0057, "lr": 3.4800691330968064e-05, "epoch": 0.8691358024691358, "percentage": 43.48, "elapsed_time": "1 day, 0:45:08", "remaining_time": "1 day, 8:10:40"} +{"current_steps": 890, "total_steps": 2024, "loss": 0.0, "lr": 3.440247619674347e-05, "epoch": 0.8790123456790123, "percentage": 43.97, "elapsed_time": "1 day, 1:01:39", "remaining_time": "1 day, 7:53:21"} +{"current_steps": 900, "total_steps": 2024, "loss": 0.0002, "lr": 3.400146265264341e-05, "epoch": 0.8888888888888888, "percentage": 44.47, "elapsed_time": "1 day, 1:17:25", "remaining_time": "1 day, 7:35:05"} +{"current_steps": 910, "total_steps": 2024, "loss": 0.0, "lr": 3.359777005023428e-05, "epoch": 0.8987654320987655, "percentage": 44.96, "elapsed_time": "1 day, 1:34:09", "remaining_time": "1 day, 7:18:05"} +{"current_steps": 920, "total_steps": 2024, "loss": 0.0, "lr": 3.3191518538436596e-05, "epoch": 0.908641975308642, "percentage": 45.45, "elapsed_time": "1 day, 1:50:37", "remaining_time": "1 day, 7:00:44"} +{"current_steps": 930, "total_steps": 2024, "loss": 0.0, "lr": 3.278282902776569e-05, "epoch": 0.9185185185185185, "percentage": 45.95, "elapsed_time": "1 day, 2:07:17", "remaining_time": "1 day, 6:43:40"} +{"current_steps": 940, "total_steps": 2024, "loss": 0.0001, "lr": 3.237182315434582e-05, "epoch": 0.928395061728395, "percentage": 46.44, "elapsed_time": "1 day, 2:23:27", "remaining_time": "1 day, 6:26:01"} +{"current_steps": 950, "total_steps": 2024, "loss": 0.0001, "lr": 3.195862324370812e-05, "epoch": 0.9382716049382716, "percentage": 46.94, "elapsed_time": "1 day, 2:39:30", "remaining_time": "1 day, 6:08:17"} +{"current_steps": 960, "total_steps": 2024, "loss": 0.0, "lr": 3.154335227438362e-05, "epoch": 0.9481481481481482, "percentage": 47.43, "elapsed_time": "1 day, 2:56:04", "remaining_time": "1 day, 5:51:08"} +{"current_steps": 970, "total_steps": 2024, "loss": 0.0, "lr": 3.112613384130168e-05, "epoch": 0.9580246913580247, "percentage": 47.92, "elapsed_time": "1 day, 3:12:15", "remaining_time": "1 day, 5:33:36"} +{"current_steps": 980, "total_steps": 2024, "loss": 0.0, "lr": 3.0707092119005155e-05, "epoch": 0.9679012345679012, "percentage": 48.42, "elapsed_time": "1 day, 3:28:37", "remaining_time": "1 day, 5:16:17"} +{"current_steps": 990, "total_steps": 2024, "loss": 0.0, "lr": 3.028635182469294e-05, "epoch": 0.9777777777777777, "percentage": 48.91, "elapsed_time": "1 day, 3:45:08", "remaining_time": "1 day, 4:59:09"} +{"current_steps": 1000, "total_steps": 2024, "loss": 0.0, "lr": 2.9864038181101046e-05, "epoch": 0.9876543209876543, "percentage": 49.41, "elapsed_time": "1 day, 4:01:31", "remaining_time": "1 day, 4:41:52"} +{"current_steps": 1000, "total_steps": 2024, "epoch": 0.9876543209876543, "percentage": 49.41, "elapsed_time": "1 day, 4:59:03", "remaining_time": "1 day, 5:40:47"} +{"current_steps": 1010, "total_steps": 2024, "loss": 0.0, "lr": 2.9440276879233197e-05, "epoch": 0.9975308641975309, "percentage": 49.9, "elapsed_time": "1 day, 5:15:42", "remaining_time": "1 day, 5:22:39"} +{"current_steps": 1020, "total_steps": 2024, "loss": 0.0, "lr": 2.9015194040952105e-05, "epoch": 1.0069135802469136, "percentage": 50.4, "elapsed_time": "1 day, 5:31:24", "remaining_time": "1 day, 5:03:37"} +{"current_steps": 1030, "total_steps": 2024, "loss": 0.0, "lr": 2.858891618144246e-05, "epoch": 1.0167901234567902, "percentage": 50.89, "elapsed_time": "1 day, 5:47:23", "remaining_time": "1 day, 4:44:55"} +{"current_steps": 1040, "total_steps": 2024, "loss": 0.0, "lr": 2.8161570171556867e-05, "epoch": 1.0266666666666666, "percentage": 51.38, "elapsed_time": "1 day, 6:03:50", "remaining_time": "1 day, 4:26:42"} +{"current_steps": 1050, "total_steps": 2024, "loss": 0.0, "lr": 2.7733283200055966e-05, "epoch": 1.0365432098765432, "percentage": 51.88, "elapsed_time": "1 day, 6:20:14", "remaining_time": "1 day, 4:08:29"} +{"current_steps": 1060, "total_steps": 2024, "loss": 0.0, "lr": 2.7304182735753864e-05, "epoch": 1.0464197530864197, "percentage": 52.37, "elapsed_time": "1 day, 6:36:22", "remaining_time": "1 day, 3:50:03"} +{"current_steps": 1070, "total_steps": 2024, "loss": 0.0, "lr": 2.68743964895803e-05, "epoch": 1.0562962962962963, "percentage": 52.87, "elapsed_time": "1 day, 6:52:20", "remaining_time": "1 day, 3:31:31"} +{"current_steps": 1080, "total_steps": 2024, "loss": 0.0, "lr": 2.6444052376570677e-05, "epoch": 1.066172839506173, "percentage": 53.36, "elapsed_time": "1 day, 7:08:39", "remaining_time": "1 day, 3:13:20"} +{"current_steps": 1090, "total_steps": 2024, "loss": 0.0, "lr": 2.60132784777954e-05, "epoch": 1.0760493827160493, "percentage": 53.85, "elapsed_time": "1 day, 7:24:21", "remaining_time": "1 day, 2:54:40"} +{"current_steps": 1100, "total_steps": 2024, "loss": 0.0, "lr": 2.5582203002239757e-05, "epoch": 1.085925925925926, "percentage": 54.35, "elapsed_time": "1 day, 7:40:31", "remaining_time": "1 day, 2:36:26"} +{"current_steps": 1110, "total_steps": 2024, "loss": 0.0, "lr": 2.515095424864577e-05, "epoch": 1.0958024691358024, "percentage": 54.84, "elapsed_time": "1 day, 7:57:02", "remaining_time": "1 day, 2:18:32"} +{"current_steps": 1120, "total_steps": 2024, "loss": 0.0, "lr": 2.471966056732728e-05, "epoch": 1.105679012345679, "percentage": 55.34, "elapsed_time": "1 day, 8:13:07", "remaining_time": "1 day, 2:00:18"} +{"current_steps": 1130, "total_steps": 2024, "loss": 0.0, "lr": 2.4288450321969752e-05, "epoch": 1.1155555555555556, "percentage": 55.83, "elapsed_time": "1 day, 8:29:02", "remaining_time": "1 day, 1:41:58"} +{"current_steps": 1140, "total_steps": 2024, "loss": 0.0, "lr": 2.385745185142603e-05, "epoch": 1.125432098765432, "percentage": 56.32, "elapsed_time": "1 day, 8:45:35", "remaining_time": "1 day, 1:24:11"} +{"current_steps": 1150, "total_steps": 2024, "loss": 0.0, "lr": 2.3426793431519524e-05, "epoch": 1.1353086419753087, "percentage": 56.82, "elapsed_time": "1 day, 9:01:29", "remaining_time": "1 day, 1:05:56"} +{"current_steps": 1160, "total_steps": 2024, "loss": 0.0, "lr": 2.2996603236866168e-05, "epoch": 1.145185185185185, "percentage": 57.31, "elapsed_time": "1 day, 9:17:32", "remaining_time": "1 day, 0:47:49"} +{"current_steps": 1170, "total_steps": 2024, "loss": 0.0, "lr": 2.2567009302726442e-05, "epoch": 1.1550617283950617, "percentage": 57.81, "elapsed_time": "1 day, 9:33:24", "remaining_time": "1 day, 0:29:37"} +{"current_steps": 1180, "total_steps": 2024, "loss": 0.0, "lr": 2.2138139486898916e-05, "epoch": 1.1649382716049383, "percentage": 58.3, "elapsed_time": "1 day, 9:49:23", "remaining_time": "1 day, 0:11:31"} +{"current_steps": 1190, "total_steps": 2024, "loss": 0.0002, "lr": 2.171012143166663e-05, "epoch": 1.1748148148148148, "percentage": 58.79, "elapsed_time": "1 day, 10:05:34", "remaining_time": "23:53:37"} +{"current_steps": 1200, "total_steps": 2024, "loss": 0.0, "lr": 2.1283082525807554e-05, "epoch": 1.1846913580246914, "percentage": 59.29, "elapsed_time": "1 day, 10:21:51", "remaining_time": "23:35:48"} +{"current_steps": 1210, "total_steps": 2024, "loss": 0.0, "lr": 2.0857149866680555e-05, "epoch": 1.194567901234568, "percentage": 59.78, "elapsed_time": "1 day, 10:38:18", "remaining_time": "23:18:08"} +{"current_steps": 1220, "total_steps": 2024, "loss": 0.0, "lr": 2.043245022239806e-05, "epoch": 1.2044444444444444, "percentage": 60.28, "elapsed_time": "1 day, 10:53:59", "remaining_time": "22:59:58"} +{"current_steps": 1230, "total_steps": 2024, "loss": 0.0, "lr": 2.000910999409672e-05, "epoch": 1.214320987654321, "percentage": 60.77, "elapsed_time": "1 day, 11:10:31", "remaining_time": "22:42:23"} +{"current_steps": 1240, "total_steps": 2024, "loss": 0.0, "lr": 1.9587255178317327e-05, "epoch": 1.2241975308641975, "percentage": 61.26, "elapsed_time": "1 day, 11:27:07", "remaining_time": "22:24:53"} +{"current_steps": 1250, "total_steps": 2024, "loss": 0.0015, "lr": 1.9167011329505064e-05, "epoch": 1.234074074074074, "percentage": 61.76, "elapsed_time": "1 day, 11:43:54", "remaining_time": "22:07:30"} +{"current_steps": 1260, "total_steps": 2024, "loss": 0.0, "lr": 1.8748503522641487e-05, "epoch": 1.2439506172839505, "percentage": 62.25, "elapsed_time": "1 day, 12:00:07", "remaining_time": "21:49:47"} +{"current_steps": 1270, "total_steps": 2024, "loss": 0.0, "lr": 1.8331856316019024e-05, "epoch": 1.2538271604938271, "percentage": 62.75, "elapsed_time": "1 day, 12:15:37", "remaining_time": "21:31:40"} +{"current_steps": 1280, "total_steps": 2024, "loss": 0.0, "lr": 1.791719371416936e-05, "epoch": 1.2637037037037038, "percentage": 63.24, "elapsed_time": "1 day, 12:31:55", "remaining_time": "21:14:03"} +{"current_steps": 1290, "total_steps": 2024, "loss": 0.0, "lr": 1.7504639130956652e-05, "epoch": 1.2735802469135802, "percentage": 63.74, "elapsed_time": "1 day, 12:47:44", "remaining_time": "20:56:11"} +{"current_steps": 1300, "total_steps": 2024, "loss": 0.0, "lr": 1.7094315352846473e-05, "epoch": 1.2834567901234568, "percentage": 64.23, "elapsed_time": "1 day, 13:04:11", "remaining_time": "20:38:41"} +{"current_steps": 1310, "total_steps": 2024, "loss": 0.0, "lr": 1.6686344502361516e-05, "epoch": 1.2933333333333334, "percentage": 64.72, "elapsed_time": "1 day, 13:20:14", "remaining_time": "20:21:00"} +{"current_steps": 1320, "total_steps": 2024, "loss": 0.0, "lr": 1.6280848001734943e-05, "epoch": 1.3032098765432099, "percentage": 65.22, "elapsed_time": "1 day, 13:36:40", "remaining_time": "20:03:33"} +{"current_steps": 1330, "total_steps": 2024, "loss": 0.0, "lr": 1.5877946536772065e-05, "epoch": 1.3130864197530865, "percentage": 65.71, "elapsed_time": "1 day, 13:52:39", "remaining_time": "19:45:52"} +{"current_steps": 1340, "total_steps": 2024, "loss": 0.0, "lr": 1.5477760020931302e-05, "epoch": 1.322962962962963, "percentage": 66.21, "elapsed_time": "1 day, 14:08:09", "remaining_time": "19:27:58"} +{"current_steps": 1350, "total_steps": 2024, "loss": 0.0, "lr": 1.5080407559634929e-05, "epoch": 1.3328395061728395, "percentage": 66.7, "elapsed_time": "1 day, 14:25:03", "remaining_time": "19:10:49"} +{"current_steps": 1360, "total_steps": 2024, "loss": 0.0001, "lr": 1.468600741482038e-05, "epoch": 1.342716049382716, "percentage": 67.19, "elapsed_time": "1 day, 14:40:59", "remaining_time": "18:53:11"} +{"current_steps": 1370, "total_steps": 2024, "loss": 0.0, "lr": 1.4294676969742571e-05, "epoch": 1.3525925925925926, "percentage": 67.69, "elapsed_time": "1 day, 14:57:11", "remaining_time": "18:35:42"} +{"current_steps": 1380, "total_steps": 2024, "loss": 0.0, "lr": 1.390653269403771e-05, "epoch": 1.3624691358024692, "percentage": 68.18, "elapsed_time": "1 day, 15:13:06", "remaining_time": "18:18:07"} +{"current_steps": 1390, "total_steps": 2024, "loss": 0.0017, "lr": 1.3521690109059062e-05, "epoch": 1.3723456790123456, "percentage": 68.68, "elapsed_time": "1 day, 15:29:49", "remaining_time": "18:00:54"} +{"current_steps": 1400, "total_steps": 2024, "loss": 0.0, "lr": 1.3140263753494903e-05, "epoch": 1.3822222222222222, "percentage": 69.17, "elapsed_time": "1 day, 15:45:48", "remaining_time": "17:43:23"} +{"current_steps": 1410, "total_steps": 2024, "loss": 0.0, "lr": 1.276236714927902e-05, "epoch": 1.3920987654320989, "percentage": 69.66, "elapsed_time": "1 day, 16:02:16", "remaining_time": "17:26:05"} +{"current_steps": 1420, "total_steps": 2024, "loss": 0.0, "lr": 1.2388112767803729e-05, "epoch": 1.4019753086419753, "percentage": 70.16, "elapsed_time": "1 day, 16:18:22", "remaining_time": "17:08:39"} +{"current_steps": 1430, "total_steps": 2024, "loss": 0.0, "lr": 1.2017611996445644e-05, "epoch": 1.411851851851852, "percentage": 70.65, "elapsed_time": "1 day, 16:34:38", "remaining_time": "16:51:18"} +{"current_steps": 1440, "total_steps": 2024, "loss": 0.0, "lr": 1.1650975105413981e-05, "epoch": 1.4217283950617283, "percentage": 71.15, "elapsed_time": "1 day, 16:51:05", "remaining_time": "16:34:03"} +{"current_steps": 1450, "total_steps": 2024, "loss": 0.0, "lr": 1.1288311214931446e-05, "epoch": 1.431604938271605, "percentage": 71.64, "elapsed_time": "1 day, 17:07:01", "remaining_time": "16:16:36"} +{"current_steps": 1460, "total_steps": 2024, "loss": 0.0, "lr": 1.092972826275735e-05, "epoch": 1.4414814814814814, "percentage": 72.13, "elapsed_time": "1 day, 17:23:05", "remaining_time": "15:59:13"} +{"current_steps": 1470, "total_steps": 2024, "loss": 0.0, "lr": 1.057533297206263e-05, "epoch": 1.451358024691358, "percentage": 72.63, "elapsed_time": "1 day, 17:39:24", "remaining_time": "15:41:57"} +{"current_steps": 1480, "total_steps": 2024, "loss": 0.0, "lr": 1.0225230819666431e-05, "epoch": 1.4612345679012346, "percentage": 73.12, "elapsed_time": "1 day, 17:55:20", "remaining_time": "15:24:33"} +{"current_steps": 1490, "total_steps": 2024, "loss": 0.0, "lr": 9.879526004643586e-06, "epoch": 1.471111111111111, "percentage": 73.62, "elapsed_time": "1 day, 18:11:39", "remaining_time": "15:07:19"} +{"current_steps": 1500, "total_steps": 2024, "loss": 0.0, "lr": 9.538321417312351e-06, "epoch": 1.4809876543209877, "percentage": 74.11, "elapsed_time": "1 day, 18:27:34", "remaining_time": "14:49:57"} +{"current_steps": 1500, "total_steps": 2024, "epoch": 1.4809876543209877, "percentage": 74.11, "elapsed_time": "1 day, 19:25:05", "remaining_time": "15:10:02"} +{"current_steps": 1510, "total_steps": 2024, "loss": 0.0, "lr": 9.201718608611729e-06, "epoch": 1.4908641975308643, "percentage": 74.6, "elapsed_time": "1 day, 19:40:37", "remaining_time": "14:52:03"} +{"current_steps": 1520, "total_steps": 2024, "loss": 0.0, "lr": 8.869817759877388e-06, "epoch": 1.5007407407407407, "percentage": 75.1, "elapsed_time": "1 day, 19:56:30", "remaining_time": "14:34:12"} +{"current_steps": 1530, "total_steps": 2024, "loss": 0.0, "lr": 8.542717653025211e-06, "epoch": 1.5106172839506171, "percentage": 75.59, "elapsed_time": "1 day, 20:12:55", "remaining_time": "14:16:33"} +{"current_steps": 1540, "total_steps": 2024, "loss": 0.0, "lr": 8.220515641151359e-06, "epoch": 1.520493827160494, "percentage": 76.09, "elapsed_time": "1 day, 20:29:09", "remaining_time": "13:58:52"} +{"current_steps": 1550, "total_steps": 2024, "loss": 0.0, "lr": 7.90330761955756e-06, "epoch": 1.5303703703703704, "percentage": 76.58, "elapsed_time": "1 day, 20:45:10", "remaining_time": "13:41:08"} +{"current_steps": 1560, "total_steps": 2024, "loss": 0.0, "lr": 7.591187997210305e-06, "epoch": 1.5402469135802468, "percentage": 77.08, "elapsed_time": "1 day, 21:01:33", "remaining_time": "13:23:32"} +{"current_steps": 1570, "total_steps": 2024, "loss": 0.0, "lr": 7.284249668642404e-06, "epoch": 1.5501234567901234, "percentage": 77.57, "elapsed_time": "1 day, 21:17:44", "remaining_time": "13:05:53"} +{"current_steps": 1580, "total_steps": 2024, "loss": 0.0, "lr": 6.9825839863052554e-06, "epoch": 1.56, "percentage": 78.06, "elapsed_time": "1 day, 21:33:59", "remaining_time": "12:48:17"} +{"current_steps": 1590, "total_steps": 2024, "loss": 0.0, "lr": 6.686280733380107e-06, "epoch": 1.5698765432098765, "percentage": 78.56, "elapsed_time": "1 day, 21:50:19", "remaining_time": "12:30:42"} +{"current_steps": 1600, "total_steps": 2024, "loss": 0.0, "lr": 6.395428097056349e-06, "epoch": 1.579753086419753, "percentage": 79.05, "elapsed_time": "1 day, 22:06:13", "remaining_time": "12:13:02"} +{"current_steps": 1610, "total_steps": 2024, "loss": 0.0, "lr": 6.110112642284829e-06, "epoch": 1.5896296296296297, "percentage": 79.55, "elapsed_time": "1 day, 22:21:38", "remaining_time": "11:55:16"} +{"current_steps": 1620, "total_steps": 2024, "loss": 0.0, "lr": 5.830419286013969e-06, "epoch": 1.5995061728395061, "percentage": 80.04, "elapsed_time": "1 day, 22:37:59", "remaining_time": "11:37:46"} +{"current_steps": 1630, "total_steps": 2024, "loss": 0.0, "lr": 5.5564312719163875e-06, "epoch": 1.6093827160493828, "percentage": 80.53, "elapsed_time": "1 day, 22:54:19", "remaining_time": "11:20:16"} +{"current_steps": 1640, "total_steps": 2024, "loss": 0.0, "lr": 5.288230145613498e-06, "epoch": 1.6192592592592594, "percentage": 81.03, "elapsed_time": "1 day, 23:10:42", "remaining_time": "11:02:48"} +{"current_steps": 1650, "total_steps": 2024, "loss": 0.0, "lr": 5.025895730405566e-06, "epoch": 1.6291358024691358, "percentage": 81.52, "elapsed_time": "1 day, 23:26:58", "remaining_time": "10:45:18"} +{"current_steps": 1660, "total_steps": 2024, "loss": 0.0, "lr": 4.769506103514232e-06, "epoch": 1.6390123456790122, "percentage": 82.02, "elapsed_time": "1 day, 23:43:06", "remaining_time": "10:27:48"} +{"current_steps": 1670, "total_steps": 2024, "loss": 0.0, "lr": 4.51913757284487e-06, "epoch": 1.6488888888888888, "percentage": 82.51, "elapsed_time": "1 day, 23:58:52", "remaining_time": "10:10:15"} +{"current_steps": 1680, "total_steps": 2024, "loss": 0.0, "lr": 4.274864654275438e-06, "epoch": 1.6587654320987655, "percentage": 83.0, "elapsed_time": "2 days, 0:15:24", "remaining_time": "9:52:52"} +{"current_steps": 1690, "total_steps": 2024, "loss": 0.0, "lr": 4.036760049478697e-06, "epoch": 1.668641975308642, "percentage": 83.5, "elapsed_time": "2 days, 0:31:31", "remaining_time": "9:35:24"} +{"current_steps": 1700, "total_steps": 2024, "loss": 0.0, "lr": 3.8048946242843995e-06, "epoch": 1.6785185185185185, "percentage": 83.99, "elapsed_time": "2 days, 0:47:35", "remaining_time": "9:17:57"} +{"current_steps": 1710, "total_steps": 2024, "loss": 0.0, "lr": 3.5793373875878806e-06, "epoch": 1.6883950617283952, "percentage": 84.49, "elapsed_time": "2 days, 1:04:00", "remaining_time": "9:00:35"} +{"current_steps": 1720, "total_steps": 2024, "loss": 0.0, "lr": 3.3601554708112713e-06, "epoch": 1.6982716049382716, "percentage": 84.98, "elapsed_time": "2 days, 1:20:08", "remaining_time": "8:43:11"} +{"current_steps": 1730, "total_steps": 2024, "loss": 0.0, "lr": 3.14741410792353e-06, "epoch": 1.7081481481481482, "percentage": 85.47, "elapsed_time": "2 days, 1:35:57", "remaining_time": "8:25:44"} +{"current_steps": 1740, "total_steps": 2024, "loss": 0.0006, "lr": 2.941176616025215e-06, "epoch": 1.7180246913580248, "percentage": 85.97, "elapsed_time": "2 days, 1:52:17", "remaining_time": "8:08:23"} +{"current_steps": 1750, "total_steps": 2024, "loss": 0.0, "lr": 2.741504376503737e-06, "epoch": 1.7279012345679012, "percentage": 86.46, "elapsed_time": "2 days, 2:08:22", "remaining_time": "7:51:01"} +{"current_steps": 1760, "total_steps": 2024, "loss": 0.0, "lr": 2.5484568167647245e-06, "epoch": 1.7377777777777776, "percentage": 86.96, "elapsed_time": "2 days, 2:24:45", "remaining_time": "7:33:42"} +{"current_steps": 1770, "total_steps": 2024, "loss": 0.0, "lr": 2.362091392544985e-06, "epoch": 1.7476543209876543, "percentage": 87.45, "elapsed_time": "2 days, 2:40:57", "remaining_time": "7:16:23"} +{"current_steps": 1780, "total_steps": 2024, "loss": 0.0, "lr": 2.1824635708122267e-06, "epoch": 1.757530864197531, "percentage": 87.94, "elapsed_time": "2 days, 2:57:07", "remaining_time": "6:59:04"} +{"current_steps": 1790, "total_steps": 2024, "loss": 0.0, "lr": 2.0096268132567183e-06, "epoch": 1.7674074074074073, "percentage": 88.44, "elapsed_time": "2 days, 3:13:27", "remaining_time": "6:41:46"} +{"current_steps": 1800, "total_steps": 2024, "loss": 0.0, "lr": 1.843632560379785e-06, "epoch": 1.777283950617284, "percentage": 88.93, "elapsed_time": "2 days, 3:29:23", "remaining_time": "6:24:27"} +{"current_steps": 1810, "total_steps": 2024, "loss": 0.0002, "lr": 1.684530216183805e-06, "epoch": 1.7871604938271606, "percentage": 89.43, "elapsed_time": "2 days, 3:45:26", "remaining_time": "6:07:09"} +{"current_steps": 1820, "total_steps": 2024, "loss": 0.0, "lr": 1.5323671334684042e-06, "epoch": 1.797037037037037, "percentage": 89.92, "elapsed_time": "2 days, 4:01:45", "remaining_time": "5:49:54"} +{"current_steps": 1830, "total_steps": 2024, "loss": 0.0, "lr": 1.3871885997370464e-06, "epoch": 1.8069135802469136, "percentage": 90.42, "elapsed_time": "2 days, 4:18:17", "remaining_time": "5:32:41"} +{"current_steps": 1840, "total_steps": 2024, "loss": 0.0, "lr": 1.2490378237183658e-06, "epoch": 1.8167901234567903, "percentage": 90.91, "elapsed_time": "2 days, 4:34:11", "remaining_time": "5:15:25"} +{"current_steps": 1850, "total_steps": 2024, "loss": 0.0, "lr": 1.1179559225061809e-06, "epoch": 1.8266666666666667, "percentage": 91.4, "elapsed_time": "2 days, 4:50:44", "remaining_time": "4:58:13"} +{"current_steps": 1860, "total_steps": 2024, "loss": 0.0, "lr": 9.93981909322031e-07, "epoch": 1.836543209876543, "percentage": 91.9, "elapsed_time": "2 days, 5:06:41", "remaining_time": "4:40:58"} +{"current_steps": 1870, "total_steps": 2024, "loss": 0.0, "lr": 8.771526819038644e-07, "epoch": 1.8464197530864197, "percentage": 92.39, "elapsed_time": "2 days, 5:23:07", "remaining_time": "4:23:47"} +{"current_steps": 1880, "total_steps": 2024, "loss": 0.0, "lr": 7.675030115243676e-07, "epoch": 1.8562962962962963, "percentage": 92.89, "elapsed_time": "2 days, 5:39:52", "remaining_time": "4:06:37"} +{"current_steps": 1890, "total_steps": 2024, "loss": 0.0, "lr": 6.650655326421646e-07, "epoch": 1.8661728395061727, "percentage": 93.38, "elapsed_time": "2 days, 5:56:43", "remaining_time": "3:49:28"} +{"current_steps": 1900, "total_steps": 2024, "loss": 0.0, "lr": 5.698707331890013e-07, "epoch": 1.8760493827160494, "percentage": 93.87, "elapsed_time": "2 days, 6:13:13", "remaining_time": "3:32:18"} +{"current_steps": 1910, "total_steps": 2024, "loss": 0.0, "lr": 4.819469454957787e-07, "epoch": 1.885925925925926, "percentage": 94.37, "elapsed_time": "2 days, 6:29:58", "remaining_time": "3:15:10"} +{"current_steps": 1920, "total_steps": 2024, "loss": 0.0028, "lr": 4.013203378601449e-07, "epoch": 1.8958024691358024, "percentage": 94.86, "elapsed_time": "2 days, 6:46:18", "remaining_time": "2:58:00"} +{"current_steps": 1930, "total_steps": 2024, "loss": 0.0, "lr": 3.2801490675817283e-07, "epoch": 1.905679012345679, "percentage": 95.36, "elapsed_time": "2 days, 7:02:31", "remaining_time": "2:40:50"} +{"current_steps": 1940, "total_steps": 2024, "loss": 0.0, "lr": 2.6205246970239514e-07, "epoch": 1.9155555555555557, "percentage": 95.85, "elapsed_time": "2 days, 7:19:18", "remaining_time": "2:23:43"} +{"current_steps": 1950, "total_steps": 2024, "loss": 0.0, "lr": 2.0345265874839593e-07, "epoch": 1.925432098765432, "percentage": 96.34, "elapsed_time": "2 days, 7:35:29", "remaining_time": "2:06:34"} +{"current_steps": 1960, "total_steps": 2024, "loss": 0.0, "lr": 1.522329146518009e-07, "epoch": 1.9353086419753085, "percentage": 96.84, "elapsed_time": "2 days, 7:51:34", "remaining_time": "1:49:26"} +{"current_steps": 1970, "total_steps": 2024, "loss": 0.0, "lr": 1.0840848167749385e-07, "epoch": 1.9451851851851854, "percentage": 97.33, "elapsed_time": "2 days, 8:07:44", "remaining_time": "1:32:18"} +{"current_steps": 1980, "total_steps": 2024, "loss": 0.0, "lr": 7.199240306254296e-08, "epoch": 1.9550617283950618, "percentage": 97.83, "elapsed_time": "2 days, 8:24:17", "remaining_time": "1:15:12"} +{"current_steps": 1990, "total_steps": 2024, "loss": 0.0, "lr": 4.299551713420058e-08, "epoch": 1.9649382716049382, "percentage": 98.32, "elapsed_time": "2 days, 8:40:25", "remaining_time": "0:58:05"} +{"current_steps": 2000, "total_steps": 2024, "loss": 0.0, "lr": 2.1426454084153003e-08, "epoch": 1.9748148148148148, "percentage": 98.81, "elapsed_time": "2 days, 8:56:44", "remaining_time": "0:41:00"} +{"current_steps": 2000, "total_steps": 2024, "epoch": 1.9748148148148148, "percentage": 98.81, "elapsed_time": "2 days, 9:54:15", "remaining_time": "0:41:41"} +{"current_steps": 2010, "total_steps": 2024, "loss": 0.0, "lr": 7.2916333999722975e-09, "epoch": 1.9846913580246914, "percentage": 99.31, "elapsed_time": "2 days, 10:10:51", "remaining_time": "0:24:18"} +{"current_steps": 2020, "total_steps": 2024, "loss": 0.0, "lr": 5.952619545002147e-10, "epoch": 1.9945679012345678, "percentage": 99.8, "elapsed_time": "2 days, 10:26:58", "remaining_time": "0:06:56"} +{"current_steps": 2024, "total_steps": 2024, "epoch": 1.9985185185185186, "percentage": 100.0, "elapsed_time": "2 days, 10:33:39", "remaining_time": "0:00:00"} diff --git a/Llama-3.1-8B-Instruct-lora-2/trainer_state.json b/Llama-3.1-8B-Instruct-lora-2/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0b8f3fb66d8c595fb688befbe56141d7ec50480f --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/trainer_state.json @@ -0,0 +1,1493 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9985185185185186, + "eval_steps": 500, + "global_step": 2024, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.009876543209876543, + "grad_norm": 8.173213958740234, + "learning_rate": 2.4630541871921186e-06, + "loss": 1.3879, + "step": 10 + }, + { + "epoch": 0.019753086419753086, + "grad_norm": 7.614120960235596, + "learning_rate": 4.926108374384237e-06, + "loss": 1.3407, + "step": 20 + }, + { + "epoch": 0.02962962962962963, + "grad_norm": 6.4321064949035645, + "learning_rate": 7.3891625615763555e-06, + "loss": 1.1236, + "step": 30 + }, + { + "epoch": 0.03950617283950617, + "grad_norm": 2.8025548458099365, + "learning_rate": 9.852216748768475e-06, + "loss": 0.5763, + "step": 40 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 0.7712829113006592, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.1632, + "step": 50 + }, + { + "epoch": 0.05925925925925926, + "grad_norm": 0.35205233097076416, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.0571, + "step": 60 + }, + { + "epoch": 0.0691358024691358, + "grad_norm": 0.9003845453262329, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.0374, + "step": 70 + }, + { + "epoch": 0.07901234567901234, + "grad_norm": 1.4704982042312622, + "learning_rate": 1.970443349753695e-05, + "loss": 0.0226, + "step": 80 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 2.574737310409546, + "learning_rate": 2.2167487684729066e-05, + "loss": 0.0129, + "step": 90 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 5.429731369018555, + "learning_rate": 2.4630541871921184e-05, + "loss": 0.0239, + "step": 100 + }, + { + "epoch": 0.10864197530864197, + "grad_norm": 0.17149314284324646, + "learning_rate": 2.70935960591133e-05, + "loss": 0.0102, + "step": 110 + }, + { + "epoch": 0.11851851851851852, + "grad_norm": 0.3575906753540039, + "learning_rate": 2.9556650246305422e-05, + "loss": 0.0089, + "step": 120 + }, + { + "epoch": 0.12839506172839507, + "grad_norm": 0.30634525418281555, + "learning_rate": 3.2019704433497536e-05, + "loss": 0.0028, + "step": 130 + }, + { + "epoch": 0.1382716049382716, + "grad_norm": 0.006271605845540762, + "learning_rate": 3.4482758620689657e-05, + "loss": 0.0006, + "step": 140 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 0.01288707833737135, + "learning_rate": 3.694581280788178e-05, + "loss": 0.0008, + "step": 150 + }, + { + "epoch": 0.1580246913580247, + "grad_norm": 0.004254512023180723, + "learning_rate": 3.94088669950739e-05, + "loss": 0.0008, + "step": 160 + }, + { + "epoch": 0.16790123456790124, + "grad_norm": 0.017511000856757164, + "learning_rate": 4.187192118226601e-05, + "loss": 0.0051, + "step": 170 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.001938911504112184, + "learning_rate": 4.433497536945813e-05, + "loss": 0.0002, + "step": 180 + }, + { + "epoch": 0.18765432098765433, + "grad_norm": 0.5761703848838806, + "learning_rate": 4.679802955665025e-05, + "loss": 0.0005, + "step": 190 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 0.0074734254740178585, + "learning_rate": 4.926108374384237e-05, + "loss": 0.0003, + "step": 200 + }, + { + "epoch": 0.2074074074074074, + "grad_norm": 0.0016212465707212687, + "learning_rate": 4.9998177025185267e-05, + "loss": 0.0011, + "step": 210 + }, + { + "epoch": 0.21728395061728395, + "grad_norm": 0.0033307652920484543, + "learning_rate": 4.998924880895427e-05, + "loss": 0.0009, + "step": 220 + }, + { + "epoch": 0.2271604938271605, + "grad_norm": 0.009896264411509037, + "learning_rate": 4.997288317313464e-05, + "loss": 0.001, + "step": 230 + }, + { + "epoch": 0.23703703703703705, + "grad_norm": 0.00731613440439105, + "learning_rate": 4.994908498854508e-05, + "loss": 0.0021, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 0.01958666928112507, + "learning_rate": 4.991786133811494e-05, + "loss": 0.0024, + "step": 250 + }, + { + "epoch": 0.25679012345679014, + "grad_norm": 0.008584077470004559, + "learning_rate": 4.9879221514776196e-05, + "loss": 0.0002, + "step": 260 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.0017416627379134297, + "learning_rate": 4.983317701869765e-05, + "loss": 0.0001, + "step": 270 + }, + { + "epoch": 0.2765432098765432, + "grad_norm": 0.8311202526092529, + "learning_rate": 4.977974155386214e-05, + "loss": 0.0014, + "step": 280 + }, + { + "epoch": 0.28641975308641976, + "grad_norm": 0.06173387169837952, + "learning_rate": 4.9718931023987926e-05, + "loss": 0.0043, + "step": 290 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 0.011783963069319725, + "learning_rate": 4.9650763527795385e-05, + "loss": 0.0032, + "step": 300 + }, + { + "epoch": 0.30617283950617286, + "grad_norm": 0.0035617423709481955, + "learning_rate": 4.9575259353620305e-05, + "loss": 0.0067, + "step": 310 + }, + { + "epoch": 0.3160493827160494, + "grad_norm": 0.007925956510007381, + "learning_rate": 4.949244097337567e-05, + "loss": 0.002, + "step": 320 + }, + { + "epoch": 0.32592592592592595, + "grad_norm": 0.00402588676661253, + "learning_rate": 4.9402333035863344e-05, + "loss": 0.0005, + "step": 330 + }, + { + "epoch": 0.3358024691358025, + "grad_norm": 0.00444524921476841, + "learning_rate": 4.930496235943811e-05, + "loss": 0.0001, + "step": 340 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 0.004133024252951145, + "learning_rate": 4.9200357924025755e-05, + "loss": 0.001, + "step": 350 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.0026975509244948626, + "learning_rate": 4.9088550862497966e-05, + "loss": 0.0042, + "step": 360 + }, + { + "epoch": 0.3654320987654321, + "grad_norm": 0.0009604657534509897, + "learning_rate": 4.8969574451406445e-05, + "loss": 0.0004, + "step": 370 + }, + { + "epoch": 0.37530864197530867, + "grad_norm": 0.0007526307599619031, + "learning_rate": 4.8843464101078924e-05, + "loss": 0.0005, + "step": 380 + }, + { + "epoch": 0.3851851851851852, + "grad_norm": 0.034833673387765884, + "learning_rate": 4.871025734508022e-05, + "loss": 0.0002, + "step": 390 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 0.0006839093985036016, + "learning_rate": 4.85699938290413e-05, + "loss": 0.0011, + "step": 400 + }, + { + "epoch": 0.4049382716049383, + "grad_norm": 0.17423595488071442, + "learning_rate": 4.842271529885978e-05, + "loss": 0.0002, + "step": 410 + }, + { + "epoch": 0.4148148148148148, + "grad_norm": 0.0021370204631239176, + "learning_rate": 4.8268465588275235e-05, + "loss": 0.0021, + "step": 420 + }, + { + "epoch": 0.4246913580246914, + "grad_norm": 0.002740511205047369, + "learning_rate": 4.8107290605823306e-05, + "loss": 0.0007, + "step": 430 + }, + { + "epoch": 0.4345679012345679, + "grad_norm": 0.5583088397979736, + "learning_rate": 4.79392383211721e-05, + "loss": 0.001, + "step": 440 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.0016133144963532686, + "learning_rate": 4.776435875084526e-05, + "loss": 0.0001, + "step": 450 + }, + { + "epoch": 0.454320987654321, + "grad_norm": 0.008442943915724754, + "learning_rate": 4.7582703943335785e-05, + "loss": 0.0001, + "step": 460 + }, + { + "epoch": 0.4641975308641975, + "grad_norm": 0.0007343398174270988, + "learning_rate": 4.739432796361515e-05, + "loss": 0.0003, + "step": 470 + }, + { + "epoch": 0.4740740740740741, + "grad_norm": 0.0006774268113076687, + "learning_rate": 4.719928687704218e-05, + "loss": 0.0, + "step": 480 + }, + { + "epoch": 0.4839506172839506, + "grad_norm": 0.0005065005389042199, + "learning_rate": 4.699763873267667e-05, + "loss": 0.0, + "step": 490 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 0.0007372256950475276, + "learning_rate": 4.678944354600249e-05, + "loss": 0.0002, + "step": 500 + }, + { + "epoch": 0.49382716049382713, + "eval_nlpcc25_task1_dev_accuracy": 0.9935714285714285, + "eval_nlpcc25_task1_dev_loss": 0.038043826818466187, + "eval_nlpcc25_task1_dev_runtime": 3452.1571, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 500 + }, + { + "epoch": 0.5037037037037037, + "grad_norm": 0.00031303250580094755, + "learning_rate": 4.65747632810655e-05, + "loss": 0.0002, + "step": 510 + }, + { + "epoch": 0.5135802469135803, + "grad_norm": 0.0012328416341915727, + "learning_rate": 4.635366183203157e-05, + "loss": 0.0101, + "step": 520 + }, + { + "epoch": 0.5234567901234568, + "grad_norm": 0.0003338649112265557, + "learning_rate": 4.612620500417001e-05, + "loss": 0.0, + "step": 530 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.00036818900844082236, + "learning_rate": 4.589246049426835e-05, + "loss": 0.0, + "step": 540 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 0.0008916526567190886, + "learning_rate": 4.565249787048408e-05, + "loss": 0.0005, + "step": 550 + }, + { + "epoch": 0.5530864197530864, + "grad_norm": 0.001147187897004187, + "learning_rate": 4.5406388551639436e-05, + "loss": 0.0, + "step": 560 + }, + { + "epoch": 0.562962962962963, + "grad_norm": 0.00036947213811799884, + "learning_rate": 4.515420578596542e-05, + "loss": 0.0032, + "step": 570 + }, + { + "epoch": 0.5728395061728395, + "grad_norm": 0.0018035719403997064, + "learning_rate": 4.489602462930126e-05, + "loss": 0.0, + "step": 580 + }, + { + "epoch": 0.582716049382716, + "grad_norm": 0.0003090534301009029, + "learning_rate": 4.4631921922755985e-05, + "loss": 0.0, + "step": 590 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 0.0003804001025855541, + "learning_rate": 4.436197626983855e-05, + "loss": 0.0, + "step": 600 + }, + { + "epoch": 0.6024691358024692, + "grad_norm": 0.0002678770397324115, + "learning_rate": 4.4086268013063556e-05, + "loss": 0.0, + "step": 610 + }, + { + "epoch": 0.6123456790123457, + "grad_norm": 0.0003251763992011547, + "learning_rate": 4.3804879210039275e-05, + "loss": 0.0, + "step": 620 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.00025686624576337636, + "learning_rate": 4.351789360904527e-05, + "loss": 0.0, + "step": 630 + }, + { + "epoch": 0.6320987654320988, + "grad_norm": 0.00028061174089089036, + "learning_rate": 4.322539662410687e-05, + "loss": 0.0, + "step": 640 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 0.0002689982939045876, + "learning_rate": 4.29274753095738e-05, + "loss": 0.0005, + "step": 650 + }, + { + "epoch": 0.6518518518518519, + "grad_norm": 0.0013502618530765176, + "learning_rate": 4.262421833421069e-05, + "loss": 0.0016, + "step": 660 + }, + { + "epoch": 0.6617283950617284, + "grad_norm": 0.0007630003965459764, + "learning_rate": 4.2315715954807e-05, + "loss": 0.0007, + "step": 670 + }, + { + "epoch": 0.671604938271605, + "grad_norm": 0.0010010383557528257, + "learning_rate": 4.200205998931442e-05, + "loss": 0.0, + "step": 680 + }, + { + "epoch": 0.6814814814814815, + "grad_norm": 0.0028828333597630262, + "learning_rate": 4.1683343789519544e-05, + "loss": 0.0, + "step": 690 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 0.0005191878299228847, + "learning_rate": 4.135966221326007e-05, + "loss": 0.0002, + "step": 700 + }, + { + "epoch": 0.7012345679012346, + "grad_norm": 0.0010550885926932096, + "learning_rate": 4.103111159619274e-05, + "loss": 0.0, + "step": 710 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.00028645008569583297, + "learning_rate": 4.0697789723121485e-05, + "loss": 0.0024, + "step": 720 + }, + { + "epoch": 0.7209876543209877, + "grad_norm": 0.00031866817153058946, + "learning_rate": 4.035979579889424e-05, + "loss": 0.0, + "step": 730 + }, + { + "epoch": 0.7308641975308642, + "grad_norm": 0.0003414931707084179, + "learning_rate": 4.001723041887713e-05, + "loss": 0.0, + "step": 740 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.00028110420680604875, + "learning_rate": 3.967019553901477e-05, + "loss": 0.0, + "step": 750 + }, + { + "epoch": 0.7506172839506173, + "grad_norm": 0.0002813187020365149, + "learning_rate": 3.931879444548568e-05, + "loss": 0.0, + "step": 760 + }, + { + "epoch": 0.7604938271604939, + "grad_norm": 0.00022827104839961976, + "learning_rate": 3.8963131723961734e-05, + "loss": 0.0, + "step": 770 + }, + { + "epoch": 0.7703703703703704, + "grad_norm": 0.00034623872488737106, + "learning_rate": 3.860331322848091e-05, + "loss": 0.0, + "step": 780 + }, + { + "epoch": 0.7802469135802469, + "grad_norm": 0.000275197351584211, + "learning_rate": 3.823944604994243e-05, + "loss": 0.0, + "step": 790 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 0.0003900310257449746, + "learning_rate": 3.7871638484233966e-05, + "loss": 0.0016, + "step": 800 + }, + { + "epoch": 0.8, + "grad_norm": 0.005876209121197462, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.0021, + "step": 810 + }, + { + "epoch": 0.8098765432098766, + "grad_norm": 0.10590516775846481, + "learning_rate": 3.71246412060613e-05, + "loss": 0.0004, + "step": 820 + }, + { + "epoch": 0.8197530864197531, + "grad_norm": 0.010082660242915154, + "learning_rate": 3.674567381849498e-05, + "loss": 0.0001, + "step": 830 + }, + { + "epoch": 0.8296296296296296, + "grad_norm": 0.0013996075140312314, + "learning_rate": 3.6363210627385004e-05, + "loss": 0.0001, + "step": 840 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 0.0020888964645564556, + "learning_rate": 3.59773654632531e-05, + "loss": 0.0001, + "step": 850 + }, + { + "epoch": 0.8493827160493828, + "grad_norm": 0.0007959024515002966, + "learning_rate": 3.558825316317998e-05, + "loss": 0.0, + "step": 860 + }, + { + "epoch": 0.8592592592592593, + "grad_norm": 0.0046511865220963955, + "learning_rate": 3.5195989536626925e-05, + "loss": 0.0, + "step": 870 + }, + { + "epoch": 0.8691358024691358, + "grad_norm": 0.3736809194087982, + "learning_rate": 3.4800691330968064e-05, + "loss": 0.0057, + "step": 880 + }, + { + "epoch": 0.8790123456790123, + "grad_norm": 0.003749604569748044, + "learning_rate": 3.440247619674347e-05, + "loss": 0.0, + "step": 890 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.0007721478468738496, + "learning_rate": 3.400146265264341e-05, + "loss": 0.0002, + "step": 900 + }, + { + "epoch": 0.8987654320987655, + "grad_norm": 0.000859771971590817, + "learning_rate": 3.359777005023428e-05, + "loss": 0.0, + "step": 910 + }, + { + "epoch": 0.908641975308642, + "grad_norm": 0.0005010567838326097, + "learning_rate": 3.3191518538436596e-05, + "loss": 0.0, + "step": 920 + }, + { + "epoch": 0.9185185185185185, + "grad_norm": 0.00021397744421847165, + "learning_rate": 3.278282902776569e-05, + "loss": 0.0, + "step": 930 + }, + { + "epoch": 0.928395061728395, + "grad_norm": 0.0004123291582800448, + "learning_rate": 3.237182315434582e-05, + "loss": 0.0001, + "step": 940 + }, + { + "epoch": 0.9382716049382716, + "grad_norm": 0.0003536223666742444, + "learning_rate": 3.195862324370812e-05, + "loss": 0.0001, + "step": 950 + }, + { + "epoch": 0.9481481481481482, + "grad_norm": 0.0007978660287335515, + "learning_rate": 3.154335227438362e-05, + "loss": 0.0, + "step": 960 + }, + { + "epoch": 0.9580246913580247, + "grad_norm": 0.0002476648660376668, + "learning_rate": 3.112613384130168e-05, + "loss": 0.0, + "step": 970 + }, + { + "epoch": 0.9679012345679012, + "grad_norm": 0.001542024314403534, + "learning_rate": 3.0707092119005155e-05, + "loss": 0.0, + "step": 980 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.0001304072793573141, + "learning_rate": 3.028635182469294e-05, + "loss": 0.0, + "step": 990 + }, + { + "epoch": 0.9876543209876543, + "grad_norm": 0.0019521948415786028, + "learning_rate": 2.9864038181101046e-05, + "loss": 0.0, + "step": 1000 + }, + { + "epoch": 0.9876543209876543, + "eval_nlpcc25_task1_dev_accuracy": 0.9957792207792208, + "eval_nlpcc25_task1_dev_loss": 0.025204554200172424, + "eval_nlpcc25_task1_dev_runtime": 3451.9671, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1000 + }, + { + "epoch": 0.9975308641975309, + "grad_norm": 0.0017585157183930278, + "learning_rate": 2.9440276879233197e-05, + "loss": 0.0, + "step": 1010 + }, + { + "epoch": 1.0069135802469136, + "grad_norm": 0.00013237106031738222, + "learning_rate": 2.9015194040952105e-05, + "loss": 0.0, + "step": 1020 + }, + { + "epoch": 1.0167901234567902, + "grad_norm": 0.0001692405203357339, + "learning_rate": 2.858891618144246e-05, + "loss": 0.0, + "step": 1030 + }, + { + "epoch": 1.0266666666666666, + "grad_norm": 0.00012385584705043584, + "learning_rate": 2.8161570171556867e-05, + "loss": 0.0, + "step": 1040 + }, + { + "epoch": 1.0365432098765432, + "grad_norm": 0.00046954487334005535, + "learning_rate": 2.7733283200055966e-05, + "loss": 0.0, + "step": 1050 + }, + { + "epoch": 1.0464197530864197, + "grad_norm": 0.000127663035527803, + "learning_rate": 2.7304182735753864e-05, + "loss": 0.0, + "step": 1060 + }, + { + "epoch": 1.0562962962962963, + "grad_norm": 0.00011920407268917188, + "learning_rate": 2.68743964895803e-05, + "loss": 0.0, + "step": 1070 + }, + { + "epoch": 1.066172839506173, + "grad_norm": 0.000622686231508851, + "learning_rate": 2.6444052376570677e-05, + "loss": 0.0, + "step": 1080 + }, + { + "epoch": 1.0760493827160493, + "grad_norm": 0.00012072878598701209, + "learning_rate": 2.60132784777954e-05, + "loss": 0.0, + "step": 1090 + }, + { + "epoch": 1.085925925925926, + "grad_norm": 0.00013644102727994323, + "learning_rate": 2.5582203002239757e-05, + "loss": 0.0, + "step": 1100 + }, + { + "epoch": 1.0958024691358024, + "grad_norm": 0.00014129135524854064, + "learning_rate": 2.515095424864577e-05, + "loss": 0.0, + "step": 1110 + }, + { + "epoch": 1.105679012345679, + "grad_norm": 0.00017698659212328494, + "learning_rate": 2.471966056732728e-05, + "loss": 0.0, + "step": 1120 + }, + { + "epoch": 1.1155555555555556, + "grad_norm": 0.00010272293002344668, + "learning_rate": 2.4288450321969752e-05, + "loss": 0.0, + "step": 1130 + }, + { + "epoch": 1.125432098765432, + "grad_norm": 0.00011754959268728271, + "learning_rate": 2.385745185142603e-05, + "loss": 0.0, + "step": 1140 + }, + { + "epoch": 1.1353086419753087, + "grad_norm": 0.00014159196871332824, + "learning_rate": 2.3426793431519524e-05, + "loss": 0.0, + "step": 1150 + }, + { + "epoch": 1.145185185185185, + "grad_norm": 0.00012516119750216603, + "learning_rate": 2.2996603236866168e-05, + "loss": 0.0, + "step": 1160 + }, + { + "epoch": 1.1550617283950617, + "grad_norm": 0.0001459266641177237, + "learning_rate": 2.2567009302726442e-05, + "loss": 0.0, + "step": 1170 + }, + { + "epoch": 1.1649382716049383, + "grad_norm": 0.00011655504204099998, + "learning_rate": 2.2138139486898916e-05, + "loss": 0.0, + "step": 1180 + }, + { + "epoch": 1.1748148148148148, + "grad_norm": 0.0714588537812233, + "learning_rate": 2.171012143166663e-05, + "loss": 0.0002, + "step": 1190 + }, + { + "epoch": 1.1846913580246914, + "grad_norm": 0.00010159167140955105, + "learning_rate": 2.1283082525807554e-05, + "loss": 0.0, + "step": 1200 + }, + { + "epoch": 1.194567901234568, + "grad_norm": 0.00014003751857671887, + "learning_rate": 2.0857149866680555e-05, + "loss": 0.0, + "step": 1210 + }, + { + "epoch": 1.2044444444444444, + "grad_norm": 0.00010035983723355457, + "learning_rate": 2.043245022239806e-05, + "loss": 0.0, + "step": 1220 + }, + { + "epoch": 1.214320987654321, + "grad_norm": 9.884718747343868e-05, + "learning_rate": 2.000910999409672e-05, + "loss": 0.0, + "step": 1230 + }, + { + "epoch": 1.2241975308641975, + "grad_norm": 9.416105604032055e-05, + "learning_rate": 1.9587255178317327e-05, + "loss": 0.0, + "step": 1240 + }, + { + "epoch": 1.234074074074074, + "grad_norm": 0.0001408099487889558, + "learning_rate": 1.9167011329505064e-05, + "loss": 0.0015, + "step": 1250 + }, + { + "epoch": 1.2439506172839505, + "grad_norm": 9.506545029580593e-05, + "learning_rate": 1.8748503522641487e-05, + "loss": 0.0, + "step": 1260 + }, + { + "epoch": 1.2538271604938271, + "grad_norm": 9.596488962415606e-05, + "learning_rate": 1.8331856316019024e-05, + "loss": 0.0, + "step": 1270 + }, + { + "epoch": 1.2637037037037038, + "grad_norm": 0.00010258956899633631, + "learning_rate": 1.791719371416936e-05, + "loss": 0.0, + "step": 1280 + }, + { + "epoch": 1.2735802469135802, + "grad_norm": 9.748171578394249e-05, + "learning_rate": 1.7504639130956652e-05, + "loss": 0.0, + "step": 1290 + }, + { + "epoch": 1.2834567901234568, + "grad_norm": 8.836873894324526e-05, + "learning_rate": 1.7094315352846473e-05, + "loss": 0.0, + "step": 1300 + }, + { + "epoch": 1.2933333333333334, + "grad_norm": 9.162294736597687e-05, + "learning_rate": 1.6686344502361516e-05, + "loss": 0.0, + "step": 1310 + }, + { + "epoch": 1.3032098765432099, + "grad_norm": 9.847845649346709e-05, + "learning_rate": 1.6280848001734943e-05, + "loss": 0.0, + "step": 1320 + }, + { + "epoch": 1.3130864197530865, + "grad_norm": 0.002509386744350195, + "learning_rate": 1.5877946536772065e-05, + "loss": 0.0, + "step": 1330 + }, + { + "epoch": 1.322962962962963, + "grad_norm": 9.157544263871387e-05, + "learning_rate": 1.5477760020931302e-05, + "loss": 0.0, + "step": 1340 + }, + { + "epoch": 1.3328395061728395, + "grad_norm": 0.00010282491712132469, + "learning_rate": 1.5080407559634929e-05, + "loss": 0.0, + "step": 1350 + }, + { + "epoch": 1.342716049382716, + "grad_norm": 0.13170938193798065, + "learning_rate": 1.468600741482038e-05, + "loss": 0.0001, + "step": 1360 + }, + { + "epoch": 1.3525925925925926, + "grad_norm": 9.943981422111392e-05, + "learning_rate": 1.4294676969742571e-05, + "loss": 0.0, + "step": 1370 + }, + { + "epoch": 1.3624691358024692, + "grad_norm": 8.701250771991909e-05, + "learning_rate": 1.390653269403771e-05, + "loss": 0.0, + "step": 1380 + }, + { + "epoch": 1.3723456790123456, + "grad_norm": 9.3141570687294e-05, + "learning_rate": 1.3521690109059062e-05, + "loss": 0.0017, + "step": 1390 + }, + { + "epoch": 1.3822222222222222, + "grad_norm": 0.00011883996921824291, + "learning_rate": 1.3140263753494903e-05, + "loss": 0.0, + "step": 1400 + }, + { + "epoch": 1.3920987654320989, + "grad_norm": 9.17040160857141e-05, + "learning_rate": 1.276236714927902e-05, + "loss": 0.0, + "step": 1410 + }, + { + "epoch": 1.4019753086419753, + "grad_norm": 0.00010277926048729569, + "learning_rate": 1.2388112767803729e-05, + "loss": 0.0, + "step": 1420 + }, + { + "epoch": 1.411851851851852, + "grad_norm": 8.92517709871754e-05, + "learning_rate": 1.2017611996445644e-05, + "loss": 0.0, + "step": 1430 + }, + { + "epoch": 1.4217283950617283, + "grad_norm": 8.916323713492602e-05, + "learning_rate": 1.1650975105413981e-05, + "loss": 0.0, + "step": 1440 + }, + { + "epoch": 1.431604938271605, + "grad_norm": 8.675019489601254e-05, + "learning_rate": 1.1288311214931446e-05, + "loss": 0.0, + "step": 1450 + }, + { + "epoch": 1.4414814814814814, + "grad_norm": 9.536254219710827e-05, + "learning_rate": 1.092972826275735e-05, + "loss": 0.0, + "step": 1460 + }, + { + "epoch": 1.451358024691358, + "grad_norm": 9.104371565626934e-05, + "learning_rate": 1.057533297206263e-05, + "loss": 0.0, + "step": 1470 + }, + { + "epoch": 1.4612345679012346, + "grad_norm": 9.839163249125704e-05, + "learning_rate": 1.0225230819666431e-05, + "loss": 0.0, + "step": 1480 + }, + { + "epoch": 1.471111111111111, + "grad_norm": 8.303455979330465e-05, + "learning_rate": 9.879526004643586e-06, + "loss": 0.0, + "step": 1490 + }, + { + "epoch": 1.4809876543209877, + "grad_norm": 9.03740365174599e-05, + "learning_rate": 9.538321417312351e-06, + "loss": 0.0, + "step": 1500 + }, + { + "epoch": 1.4809876543209877, + "eval_nlpcc25_task1_dev_accuracy": 0.9967857142857143, + "eval_nlpcc25_task1_dev_loss": 0.020024757832288742, + "eval_nlpcc25_task1_dev_runtime": 3451.5414, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 1500 + }, + { + "epoch": 1.4908641975308643, + "grad_norm": 8.933599019655958e-05, + "learning_rate": 9.201718608611729e-06, + "loss": 0.0, + "step": 1510 + }, + { + "epoch": 1.5007407407407407, + "grad_norm": 0.00010863789066206664, + "learning_rate": 8.869817759877388e-06, + "loss": 0.0, + "step": 1520 + }, + { + "epoch": 1.5106172839506171, + "grad_norm": 9.006269829114899e-05, + "learning_rate": 8.542717653025211e-06, + "loss": 0.0, + "step": 1530 + }, + { + "epoch": 1.520493827160494, + "grad_norm": 9.235177276423201e-05, + "learning_rate": 8.220515641151359e-06, + "loss": 0.0, + "step": 1540 + }, + { + "epoch": 1.5303703703703704, + "grad_norm": 9.809488983592018e-05, + "learning_rate": 7.90330761955756e-06, + "loss": 0.0, + "step": 1550 + }, + { + "epoch": 1.5402469135802468, + "grad_norm": 8.182202145690098e-05, + "learning_rate": 7.591187997210305e-06, + "loss": 0.0, + "step": 1560 + }, + { + "epoch": 1.5501234567901234, + "grad_norm": 8.401457307627425e-05, + "learning_rate": 7.284249668642404e-06, + "loss": 0.0, + "step": 1570 + }, + { + "epoch": 1.56, + "grad_norm": 8.956337114796042e-05, + "learning_rate": 6.9825839863052554e-06, + "loss": 0.0, + "step": 1580 + }, + { + "epoch": 1.5698765432098765, + "grad_norm": 0.00010167937580263242, + "learning_rate": 6.686280733380107e-06, + "loss": 0.0, + "step": 1590 + }, + { + "epoch": 1.579753086419753, + "grad_norm": 7.88121105870232e-05, + "learning_rate": 6.395428097056349e-06, + "loss": 0.0, + "step": 1600 + }, + { + "epoch": 1.5896296296296297, + "grad_norm": 8.662124309921637e-05, + "learning_rate": 6.110112642284829e-06, + "loss": 0.0, + "step": 1610 + }, + { + "epoch": 1.5995061728395061, + "grad_norm": 8.408601570408791e-05, + "learning_rate": 5.830419286013969e-06, + "loss": 0.0, + "step": 1620 + }, + { + "epoch": 1.6093827160493828, + "grad_norm": 8.535439701518044e-05, + "learning_rate": 5.5564312719163875e-06, + "loss": 0.0, + "step": 1630 + }, + { + "epoch": 1.6192592592592594, + "grad_norm": 9.110941755352542e-05, + "learning_rate": 5.288230145613498e-06, + "loss": 0.0, + "step": 1640 + }, + { + "epoch": 1.6291358024691358, + "grad_norm": 9.438098641112447e-05, + "learning_rate": 5.025895730405566e-06, + "loss": 0.0, + "step": 1650 + }, + { + "epoch": 1.6390123456790122, + "grad_norm": 9.488565410720184e-05, + "learning_rate": 4.769506103514232e-06, + "loss": 0.0, + "step": 1660 + }, + { + "epoch": 1.6488888888888888, + "grad_norm": 9.606828825781122e-05, + "learning_rate": 4.51913757284487e-06, + "loss": 0.0, + "step": 1670 + }, + { + "epoch": 1.6587654320987655, + "grad_norm": 8.396390330744907e-05, + "learning_rate": 4.274864654275438e-06, + "loss": 0.0, + "step": 1680 + }, + { + "epoch": 1.668641975308642, + "grad_norm": 9.46772561292164e-05, + "learning_rate": 4.036760049478697e-06, + "loss": 0.0, + "step": 1690 + }, + { + "epoch": 1.6785185185185185, + "grad_norm": 7.928520062705502e-05, + "learning_rate": 3.8048946242843995e-06, + "loss": 0.0, + "step": 1700 + }, + { + "epoch": 1.6883950617283952, + "grad_norm": 8.762006473261863e-05, + "learning_rate": 3.5793373875878806e-06, + "loss": 0.0, + "step": 1710 + }, + { + "epoch": 1.6982716049382716, + "grad_norm": 8.157498814398423e-05, + "learning_rate": 3.3601554708112713e-06, + "loss": 0.0, + "step": 1720 + }, + { + "epoch": 1.7081481481481482, + "grad_norm": 7.945331162773073e-05, + "learning_rate": 3.14741410792353e-06, + "loss": 0.0, + "step": 1730 + }, + { + "epoch": 1.7180246913580248, + "grad_norm": 8.328018884640187e-05, + "learning_rate": 2.941176616025215e-06, + "loss": 0.0006, + "step": 1740 + }, + { + "epoch": 1.7279012345679012, + "grad_norm": 8.189202344510704e-05, + "learning_rate": 2.741504376503737e-06, + "loss": 0.0, + "step": 1750 + }, + { + "epoch": 1.7377777777777776, + "grad_norm": 7.249087502714247e-05, + "learning_rate": 2.5484568167647245e-06, + "loss": 0.0, + "step": 1760 + }, + { + "epoch": 1.7476543209876543, + "grad_norm": 7.910580461611971e-05, + "learning_rate": 2.362091392544985e-06, + "loss": 0.0, + "step": 1770 + }, + { + "epoch": 1.757530864197531, + "grad_norm": 7.526679837610573e-05, + "learning_rate": 2.1824635708122267e-06, + "loss": 0.0, + "step": 1780 + }, + { + "epoch": 1.7674074074074073, + "grad_norm": 9.244989632861689e-05, + "learning_rate": 2.0096268132567183e-06, + "loss": 0.0, + "step": 1790 + }, + { + "epoch": 1.777283950617284, + "grad_norm": 7.878772157710046e-05, + "learning_rate": 1.843632560379785e-06, + "loss": 0.0, + "step": 1800 + }, + { + "epoch": 1.7871604938271606, + "grad_norm": 8.294432336697355e-05, + "learning_rate": 1.684530216183805e-06, + "loss": 0.0002, + "step": 1810 + }, + { + "epoch": 1.797037037037037, + "grad_norm": 8.14785817055963e-05, + "learning_rate": 1.5323671334684042e-06, + "loss": 0.0, + "step": 1820 + }, + { + "epoch": 1.8069135802469136, + "grad_norm": 7.684365846216679e-05, + "learning_rate": 1.3871885997370464e-06, + "loss": 0.0, + "step": 1830 + }, + { + "epoch": 1.8167901234567903, + "grad_norm": 7.302551966859028e-05, + "learning_rate": 1.2490378237183658e-06, + "loss": 0.0, + "step": 1840 + }, + { + "epoch": 1.8266666666666667, + "grad_norm": 8.272424020105973e-05, + "learning_rate": 1.1179559225061809e-06, + "loss": 0.0, + "step": 1850 + }, + { + "epoch": 1.836543209876543, + "grad_norm": 8.503070421284065e-05, + "learning_rate": 9.93981909322031e-07, + "loss": 0.0, + "step": 1860 + }, + { + "epoch": 1.8464197530864197, + "grad_norm": 9.047168714459985e-05, + "learning_rate": 8.771526819038644e-07, + "loss": 0.0, + "step": 1870 + }, + { + "epoch": 1.8562962962962963, + "grad_norm": 7.630702020833269e-05, + "learning_rate": 7.675030115243676e-07, + "loss": 0.0, + "step": 1880 + }, + { + "epoch": 1.8661728395061727, + "grad_norm": 8.353806333616376e-05, + "learning_rate": 6.650655326421646e-07, + "loss": 0.0, + "step": 1890 + }, + { + "epoch": 1.8760493827160494, + "grad_norm": 8.572315709898248e-05, + "learning_rate": 5.698707331890013e-07, + "loss": 0.0, + "step": 1900 + }, + { + "epoch": 1.885925925925926, + "grad_norm": 8.084969158517197e-05, + "learning_rate": 4.819469454957787e-07, + "loss": 0.0, + "step": 1910 + }, + { + "epoch": 1.8958024691358024, + "grad_norm": 0.17167682945728302, + "learning_rate": 4.013203378601449e-07, + "loss": 0.0028, + "step": 1920 + }, + { + "epoch": 1.905679012345679, + "grad_norm": 0.00012432083894964308, + "learning_rate": 3.2801490675817283e-07, + "loss": 0.0, + "step": 1930 + }, + { + "epoch": 1.9155555555555557, + "grad_norm": 8.875240746419877e-05, + "learning_rate": 2.6205246970239514e-07, + "loss": 0.0, + "step": 1940 + }, + { + "epoch": 1.925432098765432, + "grad_norm": 8.457344665657729e-05, + "learning_rate": 2.0345265874839593e-07, + "loss": 0.0, + "step": 1950 + }, + { + "epoch": 1.9353086419753085, + "grad_norm": 0.00028203128022141755, + "learning_rate": 1.522329146518009e-07, + "loss": 0.0, + "step": 1960 + }, + { + "epoch": 1.9451851851851854, + "grad_norm": 0.00013342279999051243, + "learning_rate": 1.0840848167749385e-07, + "loss": 0.0, + "step": 1970 + }, + { + "epoch": 1.9550617283950618, + "grad_norm": 8.94103359314613e-05, + "learning_rate": 7.199240306254296e-08, + "loss": 0.0, + "step": 1980 + }, + { + "epoch": 1.9649382716049382, + "grad_norm": 7.630888285348192e-05, + "learning_rate": 4.299551713420058e-08, + "loss": 0.0, + "step": 1990 + }, + { + "epoch": 1.9748148148148148, + "grad_norm": 7.469132106052712e-05, + "learning_rate": 2.1426454084153003e-08, + "loss": 0.0, + "step": 2000 + }, + { + "epoch": 1.9748148148148148, + "eval_nlpcc25_task1_dev_accuracy": 0.996525974025974, + "eval_nlpcc25_task1_dev_loss": 0.021287396550178528, + "eval_nlpcc25_task1_dev_runtime": 3451.6242, + "eval_nlpcc25_task1_dev_samples_per_second": 0.811, + "eval_nlpcc25_task1_dev_steps_per_second": 0.203, + "step": 2000 + }, + { + "epoch": 1.9846913580246914, + "grad_norm": 0.00013060342462267727, + "learning_rate": 7.2916333999722975e-09, + "loss": 0.0, + "step": 2010 + }, + { + "epoch": 1.9945679012345678, + "grad_norm": 7.544880645582452e-05, + "learning_rate": 5.952619545002147e-10, + "loss": 0.0, + "step": 2020 + }, + { + "epoch": 1.9985185185185186, + "step": 2024, + "total_flos": 1.6877866202879754e+18, + "train_loss": 0.023970503359357204, + "train_runtime": 210819.9785, + "train_samples_per_second": 0.307, + "train_steps_per_second": 0.01 + } + ], + "logging_steps": 10, + "max_steps": 2024, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6877866202879754e+18, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +} diff --git a/Llama-3.1-8B-Instruct-lora-2/training_args.bin b/Llama-3.1-8B-Instruct-lora-2/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dca977cd6736713fb5f4fb68d38053f8e5f452eb --- /dev/null +++ b/Llama-3.1-8B-Instruct-lora-2/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95434493472159610d4c2dd6dc378ebbfd2ed878eb7c793a26ac8ce723b446c0 +size 5752 diff --git a/Llama-3.1-8B-Instruct-lora-2/training_loss.png b/Llama-3.1-8B-Instruct-lora-2/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..c4f3704fea0286dbca7a5254e14428c44a69577a Binary files /dev/null and b/Llama-3.1-8B-Instruct-lora-2/training_loss.png differ