ParagonLight commited on
Commit
90d0181
·
1 Parent(s): de3815a

update lora adapters

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. llama3_8b_peft/linguistics_puzzles/README.md +69 -0
  2. llama3_8b_peft/linguistics_puzzles/adapter_config.json +34 -0
  3. llama3_8b_peft/linguistics_puzzles/adapter_model.safetensors +3 -0
  4. llama3_8b_peft/linguistics_puzzles/all_results.json +12 -0
  5. llama3_8b_peft/linguistics_puzzles/eval_results.json +7 -0
  6. llama3_8b_peft/linguistics_puzzles/special_tokens_map.json +17 -0
  7. llama3_8b_peft/linguistics_puzzles/tokenizer.json +0 -0
  8. llama3_8b_peft/linguistics_puzzles/tokenizer_config.json +2065 -0
  9. llama3_8b_peft/linguistics_puzzles/train_results.json +8 -0
  10. llama3_8b_peft/linguistics_puzzles/trainer_log.jsonl +91 -0
  11. llama3_8b_peft/linguistics_puzzles/trainer_state.json +657 -0
  12. llama3_8b_peft/linguistics_puzzles/training_args.bin +3 -0
  13. llama3_8b_peft/linguistics_puzzles/training_eval_loss.png +0 -0
  14. llama3_8b_peft/linguistics_puzzles/training_loss.png +0 -0
  15. llama3_8b_peft/news_commentary_it/README.md +85 -0
  16. llama3_8b_peft/news_commentary_it/adapter_config.json +34 -0
  17. llama3_8b_peft/news_commentary_it/adapter_model.safetensors +3 -0
  18. llama3_8b_peft/news_commentary_it/all_results.json +12 -0
  19. llama3_8b_peft/news_commentary_it/eval_results.json +7 -0
  20. llama3_8b_peft/news_commentary_it/special_tokens_map.json +17 -0
  21. llama3_8b_peft/news_commentary_it/tokenizer.json +0 -0
  22. llama3_8b_peft/news_commentary_it/tokenizer_config.json +2065 -0
  23. llama3_8b_peft/news_commentary_it/train_results.json +8 -0
  24. llama3_8b_peft/news_commentary_it/trainer_log.jsonl +0 -0
  25. llama3_8b_peft/news_commentary_it/trainer_state.json +2990 -0
  26. llama3_8b_peft/news_commentary_it/training_args.bin +3 -0
  27. llama3_8b_peft/news_commentary_it/training_eval_loss.png +0 -0
  28. llama3_8b_peft/news_commentary_it/training_loss.png +0 -0
  29. llama3_8b_peft/topical_chat/README.md +88 -0
  30. llama3_8b_peft/topical_chat/adapter_config.json +34 -0
  31. llama3_8b_peft/topical_chat/adapter_model.safetensors +3 -0
  32. llama3_8b_peft/topical_chat/all_results.json +12 -0
  33. llama3_8b_peft/topical_chat/eval_results.json +7 -0
  34. llama3_8b_peft/topical_chat/special_tokens_map.json +17 -0
  35. llama3_8b_peft/topical_chat/tokenizer.json +0 -0
  36. llama3_8b_peft/topical_chat/tokenizer_config.json +2065 -0
  37. llama3_8b_peft/topical_chat/train_results.json +8 -0
  38. llama3_8b_peft/topical_chat/trainer_log.jsonl +0 -0
  39. llama3_8b_peft/topical_chat/trainer_state.json +3434 -0
  40. llama3_8b_peft/topical_chat/training_args.bin +3 -0
  41. llama3_8b_peft/topical_chat/training_eval_loss.png +0 -0
  42. llama3_8b_peft/topical_chat/training_loss.png +0 -0
  43. llama3_8b_peft/unit_conversion/README.md +90 -0
  44. llama3_8b_peft/unit_conversion/adapter_config.json +34 -0
  45. llama3_8b_peft/unit_conversion/adapter_model.safetensors +3 -0
  46. llama3_8b_peft/unit_conversion/all_results.json +12 -0
  47. llama3_8b_peft/unit_conversion/eval_results.json +7 -0
  48. llama3_8b_peft/unit_conversion/special_tokens_map.json +17 -0
  49. llama3_8b_peft/unit_conversion/tokenizer.json +0 -0
  50. llama3_8b_peft/unit_conversion/tokenizer_config.json +2065 -0
llama3_8b_peft/linguistics_puzzles/README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: linguistics_puzzles_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # linguistics_puzzles_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the linguistics_puzzles_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.7005
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 2
46
+ - total_train_batch_size: 16
47
+ - total_eval_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_steps: 20
51
+ - num_epochs: 10.0
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:------:|:----:|:---------------:|
57
+ | 0.8807 | 2.3529 | 200 | 0.8920 |
58
+ | 0.6007 | 4.7059 | 400 | 0.7229 |
59
+ | 0.5116 | 7.0588 | 600 | 0.6910 |
60
+ | 0.446 | 9.4118 | 800 | 0.7020 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - PEFT 0.10.0
66
+ - Transformers 4.40.0
67
+ - Pytorch 2.2.1
68
+ - Datasets 2.18.0
69
+ - Tokenizers 0.19.1
llama3_8b_peft/linguistics_puzzles/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "o_proj",
24
+ "down_proj",
25
+ "gate_proj",
26
+ "q_proj",
27
+ "v_proj",
28
+ "k_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/linguistics_puzzles/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9bbcbc5cd8ea25aefada7d5b953e8ea51a139debf55b0f74e2e5ed4e094c6d0
3
+ size 83945296
llama3_8b_peft/linguistics_puzzles/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 0.7005233764648438,
4
+ "eval_runtime": 3.2355,
5
+ "eval_samples_per_second": 74.177,
6
+ "eval_steps_per_second": 4.636,
7
+ "total_flos": 1.965855597354025e+17,
8
+ "train_loss": 0.7394972818038043,
9
+ "train_runtime": 623.5699,
10
+ "train_samples_per_second": 21.81,
11
+ "train_steps_per_second": 1.363
12
+ }
llama3_8b_peft/linguistics_puzzles/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_loss": 0.7005233764648438,
4
+ "eval_runtime": 3.2355,
5
+ "eval_samples_per_second": 74.177,
6
+ "eval_steps_per_second": 4.636
7
+ }
llama3_8b_peft/linguistics_puzzles/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/linguistics_puzzles/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/linguistics_puzzles/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }
llama3_8b_peft/linguistics_puzzles/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.965855597354025e+17,
4
+ "train_loss": 0.7394972818038043,
5
+ "train_runtime": 623.5699,
6
+ "train_samples_per_second": 21.81,
7
+ "train_steps_per_second": 1.363
8
+ }
llama3_8b_peft/linguistics_puzzles/trainer_log.jsonl ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 850, "loss": 2.1604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-06, "epoch": 0.11764705882352941, "percentage": 1.18, "elapsed_time": "0:00:08", "remaining_time": "0:12:20"}
2
+ {"current_steps": 20, "total_steps": 850, "loss": 2.0002, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1e-05, "epoch": 0.23529411764705882, "percentage": 2.35, "elapsed_time": "0:00:15", "remaining_time": "0:10:52"}
3
+ {"current_steps": 30, "total_steps": 850, "loss": 1.9701, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.996418774081658e-06, "epoch": 0.35294117647058826, "percentage": 3.53, "elapsed_time": "0:00:22", "remaining_time": "0:10:20"}
4
+ {"current_steps": 40, "total_steps": 850, "loss": 1.7148, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.985680226398261e-06, "epoch": 0.47058823529411764, "percentage": 4.71, "elapsed_time": "0:00:29", "remaining_time": "0:10:02"}
5
+ {"current_steps": 50, "total_steps": 850, "loss": 1.5345, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.967799739815925e-06, "epoch": 0.5882352941176471, "percentage": 5.88, "elapsed_time": "0:00:37", "remaining_time": "0:09:52"}
6
+ {"current_steps": 60, "total_steps": 850, "loss": 1.4347, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.942802927959444e-06, "epoch": 0.7058823529411765, "percentage": 7.06, "elapsed_time": "0:00:43", "remaining_time": "0:09:38"}
7
+ {"current_steps": 70, "total_steps": 850, "loss": 1.343, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.910725598521014e-06, "epoch": 0.8235294117647058, "percentage": 8.24, "elapsed_time": "0:00:50", "remaining_time": "0:09:27"}
8
+ {"current_steps": 80, "total_steps": 850, "loss": 1.3124, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.871613701966067e-06, "epoch": 0.9411764705882353, "percentage": 9.41, "elapsed_time": "0:00:57", "remaining_time": "0:09:18"}
9
+ {"current_steps": 90, "total_steps": 850, "loss": 1.2879, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.825523265709667e-06, "epoch": 1.0588235294117647, "percentage": 10.59, "elapsed_time": "0:01:05", "remaining_time": "0:09:09"}
10
+ {"current_steps": 100, "total_steps": 850, "loss": 1.2171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.772520313857777e-06, "epoch": 1.1764705882352942, "percentage": 11.76, "elapsed_time": "0:01:12", "remaining_time": "0:09:00"}
11
+ {"current_steps": 110, "total_steps": 850, "loss": 1.0736, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.712680772628365e-06, "epoch": 1.2941176470588236, "percentage": 12.94, "elapsed_time": "0:01:18", "remaining_time": "0:08:50"}
12
+ {"current_steps": 120, "total_steps": 850, "loss": 1.1098, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.646090361587828e-06, "epoch": 1.4117647058823528, "percentage": 14.12, "elapsed_time": "0:01:26", "remaining_time": "0:08:44"}
13
+ {"current_steps": 130, "total_steps": 850, "loss": 1.0705, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.572844470858537e-06, "epoch": 1.5294117647058822, "percentage": 15.29, "elapsed_time": "0:01:33", "remaining_time": "0:08:36"}
14
+ {"current_steps": 140, "total_steps": 850, "loss": 1.0728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.493048024473413e-06, "epoch": 1.6470588235294117, "percentage": 16.47, "elapsed_time": "0:01:40", "remaining_time": "0:08:29"}
15
+ {"current_steps": 150, "total_steps": 850, "loss": 1.0956, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.406815330073244e-06, "epoch": 1.7647058823529411, "percentage": 17.65, "elapsed_time": "0:01:47", "remaining_time": "0:08:21"}
16
+ {"current_steps": 160, "total_steps": 850, "loss": 1.0212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.314269915162115e-06, "epoch": 1.8823529411764706, "percentage": 18.82, "elapsed_time": "0:01:54", "remaining_time": "0:08:12"}
17
+ {"current_steps": 170, "total_steps": 850, "loss": 0.9746, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.215544350155423e-06, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:02:01", "remaining_time": "0:08:05"}
18
+ {"current_steps": 180, "total_steps": 850, "loss": 0.9111, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.110780058474052e-06, "epoch": 2.1176470588235294, "percentage": 21.18, "elapsed_time": "0:02:08", "remaining_time": "0:07:58"}
19
+ {"current_steps": 190, "total_steps": 850, "loss": 0.8674, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.000127113956673e-06, "epoch": 2.235294117647059, "percentage": 22.35, "elapsed_time": "0:02:15", "remaining_time": "0:07:51"}
20
+ {"current_steps": 200, "total_steps": 850, "loss": 0.8807, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.883744025880429e-06, "epoch": 2.3529411764705883, "percentage": 23.53, "elapsed_time": "0:02:22", "remaining_time": "0:07:44"}
21
+ {"current_steps": 200, "total_steps": 850, "loss": null, "eval_loss": 0.8919932246208191, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.3529411764705883, "percentage": 23.53, "elapsed_time": "0:02:22", "remaining_time": "0:07:44"}
22
+ {"current_steps": 210, "total_steps": 850, "loss": 0.8804, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.761797511897907e-06, "epoch": 2.4705882352941178, "percentage": 24.71, "elapsed_time": "0:02:33", "remaining_time": "0:07:47"}
23
+ {"current_steps": 220, "total_steps": 850, "loss": 0.8961, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.634462259215719e-06, "epoch": 2.588235294117647, "percentage": 25.88, "elapsed_time": "0:02:40", "remaining_time": "0:07:39"}
24
+ {"current_steps": 230, "total_steps": 850, "loss": 0.8568, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.501920674356755e-06, "epoch": 2.7058823529411766, "percentage": 27.06, "elapsed_time": "0:02:47", "remaining_time": "0:07:31"}
25
+ {"current_steps": 240, "total_steps": 850, "loss": 0.8229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.364362621864595e-06, "epoch": 2.8235294117647056, "percentage": 28.24, "elapsed_time": "0:02:54", "remaining_time": "0:07:23"}
26
+ {"current_steps": 250, "total_steps": 850, "loss": 0.8681, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.221985152324385e-06, "epoch": 2.9411764705882355, "percentage": 29.41, "elapsed_time": "0:03:01", "remaining_time": "0:07:15"}
27
+ {"current_steps": 260, "total_steps": 850, "loss": 0.7417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.07499222008977e-06, "epoch": 3.0588235294117645, "percentage": 30.59, "elapsed_time": "0:03:08", "remaining_time": "0:07:08"}
28
+ {"current_steps": 270, "total_steps": 850, "loss": 0.7108, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.923594391120237e-06, "epoch": 3.176470588235294, "percentage": 31.76, "elapsed_time": "0:03:15", "remaining_time": "0:07:00"}
29
+ {"current_steps": 280, "total_steps": 850, "loss": 0.6948, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.768008541347423e-06, "epoch": 3.2941176470588234, "percentage": 32.94, "elapsed_time": "0:03:22", "remaining_time": "0:06:52"}
30
+ {"current_steps": 290, "total_steps": 850, "loss": 0.7723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.608457546002423e-06, "epoch": 3.411764705882353, "percentage": 34.12, "elapsed_time": "0:03:29", "remaining_time": "0:06:45"}
31
+ {"current_steps": 300, "total_steps": 850, "loss": 0.7667, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.445169960349167e-06, "epoch": 3.5294117647058822, "percentage": 35.29, "elapsed_time": "0:03:36", "remaining_time": "0:06:37"}
32
+ {"current_steps": 310, "total_steps": 850, "loss": 0.7154, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.278379692281209e-06, "epoch": 3.6470588235294117, "percentage": 36.47, "elapsed_time": "0:03:44", "remaining_time": "0:06:30"}
33
+ {"current_steps": 320, "total_steps": 850, "loss": 0.7541, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.10832566725092e-06, "epoch": 3.764705882352941, "percentage": 37.65, "elapsed_time": "0:03:51", "remaining_time": "0:06:22"}
34
+ {"current_steps": 330, "total_steps": 850, "loss": 0.6812, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.9352514860110876e-06, "epoch": 3.8823529411764706, "percentage": 38.82, "elapsed_time": "0:03:58", "remaining_time": "0:06:15"}
35
+ {"current_steps": 340, "total_steps": 850, "loss": 0.718, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.759405075659165e-06, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:04:05", "remaining_time": "0:06:08"}
36
+ {"current_steps": 350, "total_steps": 850, "loss": 0.6134, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.58103833448412e-06, "epoch": 4.117647058823529, "percentage": 41.18, "elapsed_time": "0:04:12", "remaining_time": "0:06:00"}
37
+ {"current_steps": 360, "total_steps": 850, "loss": 0.6276, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.4004067711245366e-06, "epoch": 4.235294117647059, "percentage": 42.35, "elapsed_time": "0:04:19", "remaining_time": "0:05:52"}
38
+ {"current_steps": 370, "total_steps": 850, "loss": 0.639, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.2177691385549595e-06, "epoch": 4.352941176470588, "percentage": 43.53, "elapsed_time": "0:04:26", "remaining_time": "0:05:45"}
39
+ {"current_steps": 380, "total_steps": 850, "loss": 0.6053, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.033387063424765e-06, "epoch": 4.470588235294118, "percentage": 44.71, "elapsed_time": "0:04:33", "remaining_time": "0:05:38"}
40
+ {"current_steps": 390, "total_steps": 850, "loss": 0.6622, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.8475246712804845e-06, "epoch": 4.588235294117647, "percentage": 45.88, "elapsed_time": "0:04:40", "remaining_time": "0:05:30"}
41
+ {"current_steps": 400, "total_steps": 850, "loss": 0.6007, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.660448208208513e-06, "epoch": 4.705882352941177, "percentage": 47.06, "elapsed_time": "0:04:47", "remaining_time": "0:05:23"}
42
+ {"current_steps": 400, "total_steps": 850, "loss": null, "eval_loss": 0.7228670716285706, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.705882352941177, "percentage": 47.06, "elapsed_time": "0:04:47", "remaining_time": "0:05:23"}
43
+ {"current_steps": 410, "total_steps": 850, "loss": 0.6278, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.472425659440157e-06, "epoch": 4.823529411764706, "percentage": 48.24, "elapsed_time": "0:04:57", "remaining_time": "0:05:19"}
44
+ {"current_steps": 420, "total_steps": 850, "loss": 0.5962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.2837263654653715e-06, "epoch": 4.9411764705882355, "percentage": 49.41, "elapsed_time": "0:05:05", "remaining_time": "0:05:12"}
45
+ {"current_steps": 430, "total_steps": 850, "loss": 0.6128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.094620636205096e-06, "epoch": 5.0588235294117645, "percentage": 50.59, "elapsed_time": "0:05:12", "remaining_time": "0:05:04"}
46
+ {"current_steps": 440, "total_steps": 850, "loss": 0.5379, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.905379363794907e-06, "epoch": 5.176470588235294, "percentage": 51.76, "elapsed_time": "0:05:19", "remaining_time": "0:04:57"}
47
+ {"current_steps": 450, "total_steps": 850, "loss": 0.5426, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.71627363453463e-06, "epoch": 5.294117647058823, "percentage": 52.94, "elapsed_time": "0:05:26", "remaining_time": "0:04:50"}
48
+ {"current_steps": 460, "total_steps": 850, "loss": 0.5471, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.527574340559844e-06, "epoch": 5.411764705882353, "percentage": 54.12, "elapsed_time": "0:05:33", "remaining_time": "0:04:42"}
49
+ {"current_steps": 470, "total_steps": 850, "loss": 0.5618, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.33955179179149e-06, "epoch": 5.529411764705882, "percentage": 55.29, "elapsed_time": "0:05:40", "remaining_time": "0:04:35"}
50
+ {"current_steps": 480, "total_steps": 850, "loss": 0.5939, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.152475328719517e-06, "epoch": 5.647058823529412, "percentage": 56.47, "elapsed_time": "0:05:47", "remaining_time": "0:04:27"}
51
+ {"current_steps": 490, "total_steps": 850, "loss": 0.5709, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.966612936575235e-06, "epoch": 5.764705882352941, "percentage": 57.65, "elapsed_time": "0:05:54", "remaining_time": "0:04:20"}
52
+ {"current_steps": 500, "total_steps": 850, "loss": 0.5465, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.782230861445041e-06, "epoch": 5.882352941176471, "percentage": 58.82, "elapsed_time": "0:06:02", "remaining_time": "0:04:13"}
53
+ {"current_steps": 510, "total_steps": 850, "loss": 0.5533, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5995932288754655e-06, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:06:09", "remaining_time": "0:04:06"}
54
+ {"current_steps": 520, "total_steps": 850, "loss": 0.4908, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.4189616655158803e-06, "epoch": 6.117647058823529, "percentage": 61.18, "elapsed_time": "0:06:16", "remaining_time": "0:03:58"}
55
+ {"current_steps": 530, "total_steps": 850, "loss": 0.4787, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.240594924340835e-06, "epoch": 6.235294117647059, "percentage": 62.35, "elapsed_time": "0:06:23", "remaining_time": "0:03:51"}
56
+ {"current_steps": 540, "total_steps": 850, "loss": 0.529, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0647485139889145e-06, "epoch": 6.352941176470588, "percentage": 63.53, "elapsed_time": "0:06:30", "remaining_time": "0:03:44"}
57
+ {"current_steps": 550, "total_steps": 850, "loss": 0.4941, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.89167433274908e-06, "epoch": 6.470588235294118, "percentage": 64.71, "elapsed_time": "0:06:37", "remaining_time": "0:03:36"}
58
+ {"current_steps": 560, "total_steps": 850, "loss": 0.4572, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.721620307718793e-06, "epoch": 6.588235294117647, "percentage": 65.88, "elapsed_time": "0:06:44", "remaining_time": "0:03:29"}
59
+ {"current_steps": 570, "total_steps": 850, "loss": 0.5448, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.554830039650834e-06, "epoch": 6.705882352941177, "percentage": 67.06, "elapsed_time": "0:06:51", "remaining_time": "0:03:22"}
60
+ {"current_steps": 580, "total_steps": 850, "loss": 0.5508, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.391542453997578e-06, "epoch": 6.823529411764706, "percentage": 68.24, "elapsed_time": "0:06:58", "remaining_time": "0:03:14"}
61
+ {"current_steps": 590, "total_steps": 850, "loss": 0.4691, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2319914586525776e-06, "epoch": 6.9411764705882355, "percentage": 69.41, "elapsed_time": "0:07:06", "remaining_time": "0:03:07"}
62
+ {"current_steps": 600, "total_steps": 850, "loss": 0.5116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.0764056088797646e-06, "epoch": 7.0588235294117645, "percentage": 70.59, "elapsed_time": "0:07:13", "remaining_time": "0:03:00"}
63
+ {"current_steps": 600, "total_steps": 850, "loss": null, "eval_loss": 0.6910275816917419, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 7.0588235294117645, "percentage": 70.59, "elapsed_time": "0:07:13", "remaining_time": "0:03:00"}
64
+ {"current_steps": 610, "total_steps": 850, "loss": 0.4648, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.9250077799102323e-06, "epoch": 7.176470588235294, "percentage": 71.76, "elapsed_time": "0:07:23", "remaining_time": "0:02:54"}
65
+ {"current_steps": 620, "total_steps": 850, "loss": 0.4766, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.7780148476756148e-06, "epoch": 7.294117647058823, "percentage": 72.94, "elapsed_time": "0:07:30", "remaining_time": "0:02:47"}
66
+ {"current_steps": 630, "total_steps": 850, "loss": 0.4604, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.6356373781354058e-06, "epoch": 7.411764705882353, "percentage": 74.12, "elapsed_time": "0:07:37", "remaining_time": "0:02:39"}
67
+ {"current_steps": 640, "total_steps": 850, "loss": 0.4531, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.4980793256432474e-06, "epoch": 7.529411764705882, "percentage": 75.29, "elapsed_time": "0:07:44", "remaining_time": "0:02:32"}
68
+ {"current_steps": 650, "total_steps": 850, "loss": 0.496, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.3655377407842813e-06, "epoch": 7.647058823529412, "percentage": 76.47, "elapsed_time": "0:07:52", "remaining_time": "0:02:25"}
69
+ {"current_steps": 660, "total_steps": 850, "loss": 0.4421, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.2382024881020937e-06, "epoch": 7.764705882352941, "percentage": 77.65, "elapsed_time": "0:07:59", "remaining_time": "0:02:17"}
70
+ {"current_steps": 670, "total_steps": 850, "loss": 0.4582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.1162559741195733e-06, "epoch": 7.882352941176471, "percentage": 78.82, "elapsed_time": "0:08:06", "remaining_time": "0:02:10"}
71
+ {"current_steps": 680, "total_steps": 850, "loss": 0.4808, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.998728860433277e-07, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:08:13", "remaining_time": "0:02:03"}
72
+ {"current_steps": 690, "total_steps": 850, "loss": 0.4496, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.892199415259501e-07, "epoch": 8.117647058823529, "percentage": 81.18, "elapsed_time": "0:08:20", "remaining_time": "0:01:56"}
73
+ {"current_steps": 700, "total_steps": 850, "loss": 0.3856, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.844556498445788e-07, "epoch": 8.235294117647058, "percentage": 82.35, "elapsed_time": "0:08:27", "remaining_time": "0:01:48"}
74
+ {"current_steps": 710, "total_steps": 850, "loss": 0.4586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.857300848378857e-07, "epoch": 8.352941176470589, "percentage": 83.53, "elapsed_time": "0:08:34", "remaining_time": "0:01:41"}
75
+ {"current_steps": 720, "total_steps": 850, "loss": 0.4557, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.931846699267558e-07, "epoch": 8.470588235294118, "percentage": 84.71, "elapsed_time": "0:08:41", "remaining_time": "0:01:34"}
76
+ {"current_steps": 730, "total_steps": 850, "loss": 0.4591, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.0695197552659e-07, "epoch": 8.588235294117647, "percentage": 85.88, "elapsed_time": "0:08:48", "remaining_time": "0:01:26"}
77
+ {"current_steps": 740, "total_steps": 850, "loss": 0.4422, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.271555291414636e-07, "epoch": 8.705882352941176, "percentage": 87.06, "elapsed_time": "0:08:56", "remaining_time": "0:01:19"}
78
+ {"current_steps": 750, "total_steps": 850, "loss": 0.4336, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.539096384121743e-07, "epoch": 8.823529411764707, "percentage": 88.24, "elapsed_time": "0:09:02", "remaining_time": "0:01:12"}
79
+ {"current_steps": 760, "total_steps": 850, "loss": 0.4929, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.873192273716369e-07, "epoch": 8.941176470588236, "percentage": 89.41, "elapsed_time": "0:09:10", "remaining_time": "0:01:05"}
80
+ {"current_steps": 770, "total_steps": 850, "loss": 0.4903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.274796861422246e-07, "epoch": 9.058823529411764, "percentage": 90.59, "elapsed_time": "0:09:17", "remaining_time": "0:00:57"}
81
+ {"current_steps": 780, "total_steps": 850, "loss": 0.4236, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.7447673429033361e-07, "epoch": 9.176470588235293, "percentage": 91.76, "elapsed_time": "0:09:24", "remaining_time": "0:00:50"}
82
+ {"current_steps": 790, "total_steps": 850, "loss": 0.4297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.2838629803393343e-07, "epoch": 9.294117647058824, "percentage": 92.94, "elapsed_time": "0:09:31", "remaining_time": "0:00:43"}
83
+ {"current_steps": 800, "total_steps": 850, "loss": 0.446, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.927440147898703e-08, "epoch": 9.411764705882353, "percentage": 94.12, "elapsed_time": "0:09:38", "remaining_time": "0:00:36"}
84
+ {"current_steps": 800, "total_steps": 850, "loss": null, "eval_loss": 0.7020273804664612, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 9.411764705882353, "percentage": 94.12, "elapsed_time": "0:09:38", "remaining_time": "0:00:36"}
85
+ {"current_steps": 810, "total_steps": 850, "loss": 0.4399, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.7197072040557356e-08, "epoch": 9.529411764705882, "percentage": 95.29, "elapsed_time": "0:09:49", "remaining_time": "0:00:29"}
86
+ {"current_steps": 820, "total_steps": 850, "loss": 0.4143, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.220026018407541e-08, "epoch": 9.647058823529411, "percentage": 96.47, "elapsed_time": "0:09:56", "remaining_time": "0:00:21"}
87
+ {"current_steps": 830, "total_steps": 850, "loss": 0.4763, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1.431977360173975e-08, "epoch": 9.764705882352942, "percentage": 97.65, "elapsed_time": "0:10:03", "remaining_time": "0:00:14"}
88
+ {"current_steps": 840, "total_steps": 850, "loss": 0.4269, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5812259183426457e-09, "epoch": 9.882352941176471, "percentage": 98.82, "elapsed_time": "0:10:10", "remaining_time": "0:00:07"}
89
+ {"current_steps": 850, "total_steps": 850, "loss": 0.504, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 0.0, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:10:17", "remaining_time": "0:00:00"}
90
+ {"current_steps": 850, "total_steps": 850, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:10:17", "remaining_time": "0:00:00"}
91
+ {"current_steps": 15, "total_steps": 15, "loss": null, "eval_loss": 0.7005233764648438, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:10:20", "remaining_time": "0:00:00"}
llama3_8b_peft/linguistics_puzzles/trainer_state.json ADDED
@@ -0,0 +1,657 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "eval_steps": 200,
6
+ "global_step": 850,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11764705882352941,
13
+ "grad_norm": 4.299584865570068,
14
+ "learning_rate": 5e-06,
15
+ "loss": 2.1604,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.23529411764705882,
20
+ "grad_norm": 3.239166736602783,
21
+ "learning_rate": 1e-05,
22
+ "loss": 2.0002,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.35294117647058826,
27
+ "grad_norm": 2.765167236328125,
28
+ "learning_rate": 9.996418774081658e-06,
29
+ "loss": 1.9701,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.47058823529411764,
34
+ "grad_norm": 3.1078121662139893,
35
+ "learning_rate": 9.985680226398261e-06,
36
+ "loss": 1.7148,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.5882352941176471,
41
+ "grad_norm": 2.9932522773742676,
42
+ "learning_rate": 9.967799739815925e-06,
43
+ "loss": 1.5345,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.7058823529411765,
48
+ "grad_norm": 2.9176018238067627,
49
+ "learning_rate": 9.942802927959444e-06,
50
+ "loss": 1.4347,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.8235294117647058,
55
+ "grad_norm": 4.249242782592773,
56
+ "learning_rate": 9.910725598521014e-06,
57
+ "loss": 1.343,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.9411764705882353,
62
+ "grad_norm": 3.216294288635254,
63
+ "learning_rate": 9.871613701966067e-06,
64
+ "loss": 1.3124,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 1.0588235294117647,
69
+ "grad_norm": 4.646539211273193,
70
+ "learning_rate": 9.825523265709667e-06,
71
+ "loss": 1.2879,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 1.1764705882352942,
76
+ "grad_norm": 4.158232688903809,
77
+ "learning_rate": 9.772520313857777e-06,
78
+ "loss": 1.2171,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 1.2941176470588236,
83
+ "grad_norm": 4.180668354034424,
84
+ "learning_rate": 9.712680772628365e-06,
85
+ "loss": 1.0736,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 1.4117647058823528,
90
+ "grad_norm": 4.965755462646484,
91
+ "learning_rate": 9.646090361587828e-06,
92
+ "loss": 1.1098,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 1.5294117647058822,
97
+ "grad_norm": 4.774384498596191,
98
+ "learning_rate": 9.572844470858537e-06,
99
+ "loss": 1.0705,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 1.6470588235294117,
104
+ "grad_norm": 4.767484188079834,
105
+ "learning_rate": 9.493048024473413e-06,
106
+ "loss": 1.0728,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 1.7647058823529411,
111
+ "grad_norm": 5.4234395027160645,
112
+ "learning_rate": 9.406815330073244e-06,
113
+ "loss": 1.0956,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 1.8823529411764706,
118
+ "grad_norm": 5.280179500579834,
119
+ "learning_rate": 9.314269915162115e-06,
120
+ "loss": 1.0212,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 2.0,
125
+ "grad_norm": 4.913310527801514,
126
+ "learning_rate": 9.215544350155423e-06,
127
+ "loss": 0.9746,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 2.1176470588235294,
132
+ "grad_norm": 4.672488689422607,
133
+ "learning_rate": 9.110780058474052e-06,
134
+ "loss": 0.9111,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 2.235294117647059,
139
+ "grad_norm": 5.0370073318481445,
140
+ "learning_rate": 9.000127113956673e-06,
141
+ "loss": 0.8674,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 2.3529411764705883,
146
+ "grad_norm": 6.736476898193359,
147
+ "learning_rate": 8.883744025880429e-06,
148
+ "loss": 0.8807,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 2.3529411764705883,
153
+ "eval_loss": 0.8919932246208191,
154
+ "eval_runtime": 3.2499,
155
+ "eval_samples_per_second": 73.849,
156
+ "eval_steps_per_second": 4.616,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 2.4705882352941178,
161
+ "grad_norm": 4.640665054321289,
162
+ "learning_rate": 8.761797511897907e-06,
163
+ "loss": 0.8804,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 2.588235294117647,
168
+ "grad_norm": 5.618963718414307,
169
+ "learning_rate": 8.634462259215719e-06,
170
+ "loss": 0.8961,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 2.7058823529411766,
175
+ "grad_norm": 6.179649353027344,
176
+ "learning_rate": 8.501920674356755e-06,
177
+ "loss": 0.8568,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 2.8235294117647056,
182
+ "grad_norm": 5.1857099533081055,
183
+ "learning_rate": 8.364362621864595e-06,
184
+ "loss": 0.8229,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 2.9411764705882355,
189
+ "grad_norm": 6.933017730712891,
190
+ "learning_rate": 8.221985152324385e-06,
191
+ "loss": 0.8681,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 3.0588235294117645,
196
+ "grad_norm": 6.753420829772949,
197
+ "learning_rate": 8.07499222008977e-06,
198
+ "loss": 0.7417,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 3.176470588235294,
203
+ "grad_norm": 7.1421685218811035,
204
+ "learning_rate": 7.923594391120237e-06,
205
+ "loss": 0.7108,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 3.2941176470588234,
210
+ "grad_norm": 6.694092750549316,
211
+ "learning_rate": 7.768008541347423e-06,
212
+ "loss": 0.6948,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 3.411764705882353,
217
+ "grad_norm": 6.256473064422607,
218
+ "learning_rate": 7.608457546002423e-06,
219
+ "loss": 0.7723,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 3.5294117647058822,
224
+ "grad_norm": 5.536714553833008,
225
+ "learning_rate": 7.445169960349167e-06,
226
+ "loss": 0.7667,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 3.6470588235294117,
231
+ "grad_norm": 6.74088716506958,
232
+ "learning_rate": 7.278379692281209e-06,
233
+ "loss": 0.7154,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 3.764705882352941,
238
+ "grad_norm": 7.156430244445801,
239
+ "learning_rate": 7.10832566725092e-06,
240
+ "loss": 0.7541,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 3.8823529411764706,
245
+ "grad_norm": 7.729401588439941,
246
+ "learning_rate": 6.9352514860110876e-06,
247
+ "loss": 0.6812,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 4.0,
252
+ "grad_norm": 7.48971700668335,
253
+ "learning_rate": 6.759405075659165e-06,
254
+ "loss": 0.718,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 4.117647058823529,
259
+ "grad_norm": 5.8114705085754395,
260
+ "learning_rate": 6.58103833448412e-06,
261
+ "loss": 0.6134,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 4.235294117647059,
266
+ "grad_norm": 9.239020347595215,
267
+ "learning_rate": 6.4004067711245366e-06,
268
+ "loss": 0.6276,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 4.352941176470588,
273
+ "grad_norm": 7.040438652038574,
274
+ "learning_rate": 6.2177691385549595e-06,
275
+ "loss": 0.639,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 4.470588235294118,
280
+ "grad_norm": 7.773618698120117,
281
+ "learning_rate": 6.033387063424765e-06,
282
+ "loss": 0.6053,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 4.588235294117647,
287
+ "grad_norm": 6.618800163269043,
288
+ "learning_rate": 5.8475246712804845e-06,
289
+ "loss": 0.6622,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 4.705882352941177,
294
+ "grad_norm": 5.934289455413818,
295
+ "learning_rate": 5.660448208208513e-06,
296
+ "loss": 0.6007,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 4.705882352941177,
301
+ "eval_loss": 0.7228670716285706,
302
+ "eval_runtime": 3.2496,
303
+ "eval_samples_per_second": 73.856,
304
+ "eval_steps_per_second": 4.616,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 4.823529411764706,
309
+ "grad_norm": 7.17456579208374,
310
+ "learning_rate": 5.472425659440157e-06,
311
+ "loss": 0.6278,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 4.9411764705882355,
316
+ "grad_norm": 7.006382465362549,
317
+ "learning_rate": 5.2837263654653715e-06,
318
+ "loss": 0.5962,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 5.0588235294117645,
323
+ "grad_norm": 6.821316242218018,
324
+ "learning_rate": 5.094620636205096e-06,
325
+ "loss": 0.6128,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 5.176470588235294,
330
+ "grad_norm": 6.48532247543335,
331
+ "learning_rate": 4.905379363794907e-06,
332
+ "loss": 0.5379,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 5.294117647058823,
337
+ "grad_norm": 7.251594543457031,
338
+ "learning_rate": 4.71627363453463e-06,
339
+ "loss": 0.5426,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 5.411764705882353,
344
+ "grad_norm": 6.321185111999512,
345
+ "learning_rate": 4.527574340559844e-06,
346
+ "loss": 0.5471,
347
+ "step": 460
348
+ },
349
+ {
350
+ "epoch": 5.529411764705882,
351
+ "grad_norm": 9.023582458496094,
352
+ "learning_rate": 4.33955179179149e-06,
353
+ "loss": 0.5618,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 5.647058823529412,
358
+ "grad_norm": 8.510845184326172,
359
+ "learning_rate": 4.152475328719517e-06,
360
+ "loss": 0.5939,
361
+ "step": 480
362
+ },
363
+ {
364
+ "epoch": 5.764705882352941,
365
+ "grad_norm": 7.750530242919922,
366
+ "learning_rate": 3.966612936575235e-06,
367
+ "loss": 0.5709,
368
+ "step": 490
369
+ },
370
+ {
371
+ "epoch": 5.882352941176471,
372
+ "grad_norm": 6.927280426025391,
373
+ "learning_rate": 3.782230861445041e-06,
374
+ "loss": 0.5465,
375
+ "step": 500
376
+ },
377
+ {
378
+ "epoch": 6.0,
379
+ "grad_norm": 7.671702861785889,
380
+ "learning_rate": 3.5995932288754655e-06,
381
+ "loss": 0.5533,
382
+ "step": 510
383
+ },
384
+ {
385
+ "epoch": 6.117647058823529,
386
+ "grad_norm": 6.111567974090576,
387
+ "learning_rate": 3.4189616655158803e-06,
388
+ "loss": 0.4908,
389
+ "step": 520
390
+ },
391
+ {
392
+ "epoch": 6.235294117647059,
393
+ "grad_norm": 6.9791259765625,
394
+ "learning_rate": 3.240594924340835e-06,
395
+ "loss": 0.4787,
396
+ "step": 530
397
+ },
398
+ {
399
+ "epoch": 6.352941176470588,
400
+ "grad_norm": 8.243158340454102,
401
+ "learning_rate": 3.0647485139889145e-06,
402
+ "loss": 0.529,
403
+ "step": 540
404
+ },
405
+ {
406
+ "epoch": 6.470588235294118,
407
+ "grad_norm": 7.592572212219238,
408
+ "learning_rate": 2.89167433274908e-06,
409
+ "loss": 0.4941,
410
+ "step": 550
411
+ },
412
+ {
413
+ "epoch": 6.588235294117647,
414
+ "grad_norm": 8.381823539733887,
415
+ "learning_rate": 2.721620307718793e-06,
416
+ "loss": 0.4572,
417
+ "step": 560
418
+ },
419
+ {
420
+ "epoch": 6.705882352941177,
421
+ "grad_norm": 9.285310745239258,
422
+ "learning_rate": 2.554830039650834e-06,
423
+ "loss": 0.5448,
424
+ "step": 570
425
+ },
426
+ {
427
+ "epoch": 6.823529411764706,
428
+ "grad_norm": 7.98577880859375,
429
+ "learning_rate": 2.391542453997578e-06,
430
+ "loss": 0.5508,
431
+ "step": 580
432
+ },
433
+ {
434
+ "epoch": 6.9411764705882355,
435
+ "grad_norm": 9.253005981445312,
436
+ "learning_rate": 2.2319914586525776e-06,
437
+ "loss": 0.4691,
438
+ "step": 590
439
+ },
440
+ {
441
+ "epoch": 7.0588235294117645,
442
+ "grad_norm": 7.2772321701049805,
443
+ "learning_rate": 2.0764056088797646e-06,
444
+ "loss": 0.5116,
445
+ "step": 600
446
+ },
447
+ {
448
+ "epoch": 7.0588235294117645,
449
+ "eval_loss": 0.6910275816917419,
450
+ "eval_runtime": 3.2498,
451
+ "eval_samples_per_second": 73.85,
452
+ "eval_steps_per_second": 4.616,
453
+ "step": 600
454
+ },
455
+ {
456
+ "epoch": 7.176470588235294,
457
+ "grad_norm": 10.038715362548828,
458
+ "learning_rate": 1.9250077799102323e-06,
459
+ "loss": 0.4648,
460
+ "step": 610
461
+ },
462
+ {
463
+ "epoch": 7.294117647058823,
464
+ "grad_norm": 8.721668243408203,
465
+ "learning_rate": 1.7780148476756148e-06,
466
+ "loss": 0.4766,
467
+ "step": 620
468
+ },
469
+ {
470
+ "epoch": 7.411764705882353,
471
+ "grad_norm": 7.398141384124756,
472
+ "learning_rate": 1.6356373781354058e-06,
473
+ "loss": 0.4604,
474
+ "step": 630
475
+ },
476
+ {
477
+ "epoch": 7.529411764705882,
478
+ "grad_norm": 7.99430513381958,
479
+ "learning_rate": 1.4980793256432474e-06,
480
+ "loss": 0.4531,
481
+ "step": 640
482
+ },
483
+ {
484
+ "epoch": 7.647058823529412,
485
+ "grad_norm": 10.882220268249512,
486
+ "learning_rate": 1.3655377407842813e-06,
487
+ "loss": 0.496,
488
+ "step": 650
489
+ },
490
+ {
491
+ "epoch": 7.764705882352941,
492
+ "grad_norm": 8.812675476074219,
493
+ "learning_rate": 1.2382024881020937e-06,
494
+ "loss": 0.4421,
495
+ "step": 660
496
+ },
497
+ {
498
+ "epoch": 7.882352941176471,
499
+ "grad_norm": 6.555144786834717,
500
+ "learning_rate": 1.1162559741195733e-06,
501
+ "loss": 0.4582,
502
+ "step": 670
503
+ },
504
+ {
505
+ "epoch": 8.0,
506
+ "grad_norm": 8.672150611877441,
507
+ "learning_rate": 9.998728860433277e-07,
508
+ "loss": 0.4808,
509
+ "step": 680
510
+ },
511
+ {
512
+ "epoch": 8.117647058823529,
513
+ "grad_norm": 6.934388160705566,
514
+ "learning_rate": 8.892199415259501e-07,
515
+ "loss": 0.4496,
516
+ "step": 690
517
+ },
518
+ {
519
+ "epoch": 8.235294117647058,
520
+ "grad_norm": 7.697629451751709,
521
+ "learning_rate": 7.844556498445788e-07,
522
+ "loss": 0.3856,
523
+ "step": 700
524
+ },
525
+ {
526
+ "epoch": 8.352941176470589,
527
+ "grad_norm": 8.46208381652832,
528
+ "learning_rate": 6.857300848378857e-07,
529
+ "loss": 0.4586,
530
+ "step": 710
531
+ },
532
+ {
533
+ "epoch": 8.470588235294118,
534
+ "grad_norm": 9.014086723327637,
535
+ "learning_rate": 5.931846699267558e-07,
536
+ "loss": 0.4557,
537
+ "step": 720
538
+ },
539
+ {
540
+ "epoch": 8.588235294117647,
541
+ "grad_norm": 8.568846702575684,
542
+ "learning_rate": 5.0695197552659e-07,
543
+ "loss": 0.4591,
544
+ "step": 730
545
+ },
546
+ {
547
+ "epoch": 8.705882352941176,
548
+ "grad_norm": 7.788153171539307,
549
+ "learning_rate": 4.271555291414636e-07,
550
+ "loss": 0.4422,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 8.823529411764707,
555
+ "grad_norm": 6.4394354820251465,
556
+ "learning_rate": 3.539096384121743e-07,
557
+ "loss": 0.4336,
558
+ "step": 750
559
+ },
560
+ {
561
+ "epoch": 8.941176470588236,
562
+ "grad_norm": 8.08786678314209,
563
+ "learning_rate": 2.873192273716369e-07,
564
+ "loss": 0.4929,
565
+ "step": 760
566
+ },
567
+ {
568
+ "epoch": 9.058823529411764,
569
+ "grad_norm": 7.791020393371582,
570
+ "learning_rate": 2.274796861422246e-07,
571
+ "loss": 0.4903,
572
+ "step": 770
573
+ },
574
+ {
575
+ "epoch": 9.176470588235293,
576
+ "grad_norm": 7.249619007110596,
577
+ "learning_rate": 1.7447673429033361e-07,
578
+ "loss": 0.4236,
579
+ "step": 780
580
+ },
581
+ {
582
+ "epoch": 9.294117647058824,
583
+ "grad_norm": 8.345190048217773,
584
+ "learning_rate": 1.2838629803393343e-07,
585
+ "loss": 0.4297,
586
+ "step": 790
587
+ },
588
+ {
589
+ "epoch": 9.411764705882353,
590
+ "grad_norm": 6.6279706954956055,
591
+ "learning_rate": 8.927440147898703e-08,
592
+ "loss": 0.446,
593
+ "step": 800
594
+ },
595
+ {
596
+ "epoch": 9.411764705882353,
597
+ "eval_loss": 0.7020273804664612,
598
+ "eval_runtime": 3.248,
599
+ "eval_samples_per_second": 73.892,
600
+ "eval_steps_per_second": 4.618,
601
+ "step": 800
602
+ },
603
+ {
604
+ "epoch": 9.529411764705882,
605
+ "grad_norm": 10.41592025756836,
606
+ "learning_rate": 5.7197072040557356e-08,
607
+ "loss": 0.4399,
608
+ "step": 810
609
+ },
610
+ {
611
+ "epoch": 9.647058823529411,
612
+ "grad_norm": 7.568772315979004,
613
+ "learning_rate": 3.220026018407541e-08,
614
+ "loss": 0.4143,
615
+ "step": 820
616
+ },
617
+ {
618
+ "epoch": 9.764705882352942,
619
+ "grad_norm": 9.008621215820312,
620
+ "learning_rate": 1.431977360173975e-08,
621
+ "loss": 0.4763,
622
+ "step": 830
623
+ },
624
+ {
625
+ "epoch": 9.882352941176471,
626
+ "grad_norm": 7.217759609222412,
627
+ "learning_rate": 3.5812259183426457e-09,
628
+ "loss": 0.4269,
629
+ "step": 840
630
+ },
631
+ {
632
+ "epoch": 10.0,
633
+ "grad_norm": 10.685968399047852,
634
+ "learning_rate": 0.0,
635
+ "loss": 0.504,
636
+ "step": 850
637
+ },
638
+ {
639
+ "epoch": 10.0,
640
+ "step": 850,
641
+ "total_flos": 1.965855597354025e+17,
642
+ "train_loss": 0.7394972818038043,
643
+ "train_runtime": 623.5699,
644
+ "train_samples_per_second": 21.81,
645
+ "train_steps_per_second": 1.363
646
+ }
647
+ ],
648
+ "logging_steps": 10,
649
+ "max_steps": 850,
650
+ "num_input_tokens_seen": 0,
651
+ "num_train_epochs": 10,
652
+ "save_steps": 1000,
653
+ "total_flos": 1.965855597354025e+17,
654
+ "train_batch_size": 8,
655
+ "trial_name": null,
656
+ "trial_params": null
657
+ }
llama3_8b_peft/linguistics_puzzles/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0ad7870ec25f99470ceb4dbbe449742b6f94851dc8a0c9c7ecca958a3c6dd9
3
+ size 5176
llama3_8b_peft/linguistics_puzzles/training_eval_loss.png ADDED
llama3_8b_peft/linguistics_puzzles/training_loss.png ADDED
llama3_8b_peft/news_commentary_it/README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: news_commentary_it_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # news_commentary_it_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the news_commentary_it_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.7665
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 2
46
+ - total_train_batch_size: 16
47
+ - total_eval_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_steps: 20
51
+ - num_epochs: 10.0
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:------:|:----:|:---------------:|
57
+ | 0.8384 | 0.1255 | 200 | 0.8427 |
58
+ | 0.8033 | 0.2509 | 400 | 0.8175 |
59
+ | 0.747 | 0.3764 | 600 | 0.8063 |
60
+ | 0.8685 | 0.5019 | 800 | 0.7983 |
61
+ | 0.7784 | 0.6274 | 1000 | 0.7934 |
62
+ | 0.7192 | 0.7528 | 1200 | 0.7873 |
63
+ | 0.7894 | 0.8783 | 1400 | 0.7846 |
64
+ | 0.7502 | 1.0038 | 1600 | 0.7800 |
65
+ | 0.7253 | 1.1292 | 1800 | 0.7777 |
66
+ | 0.7756 | 1.2547 | 2000 | 0.7758 |
67
+ | 0.7199 | 1.3802 | 2200 | 0.7733 |
68
+ | 0.711 | 1.5056 | 2400 | 0.7718 |
69
+ | 0.7531 | 1.6311 | 2600 | 0.7695 |
70
+ | 0.7423 | 1.7566 | 2800 | 0.7680 |
71
+ | 0.6911 | 1.8821 | 3000 | 0.7665 |
72
+ | 0.7206 | 2.0075 | 3200 | 0.7645 |
73
+ | 0.7139 | 2.1330 | 3400 | 0.7690 |
74
+ | 0.6934 | 2.2585 | 3600 | 0.7689 |
75
+ | 0.6709 | 2.3839 | 3800 | 0.7715 |
76
+ | 0.6639 | 2.5094 | 4000 | 0.7693 |
77
+
78
+
79
+ ### Framework versions
80
+
81
+ - PEFT 0.10.0
82
+ - Transformers 4.40.0
83
+ - Pytorch 2.2.1
84
+ - Datasets 2.18.0
85
+ - Tokenizers 0.19.1
llama3_8b_peft/news_commentary_it/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "k_proj",
24
+ "q_proj",
25
+ "gate_proj",
26
+ "o_proj",
27
+ "down_proj",
28
+ "v_proj",
29
+ "up_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/news_commentary_it/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6f4787c96093ec108afd438955e977ac1a5be1ffbc662c92b73482d55d8d0e
3
+ size 83945296
llama3_8b_peft/news_commentary_it/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.509410288582183,
3
+ "eval_loss": 0.766462504863739,
4
+ "eval_runtime": 45.0811,
5
+ "eval_samples_per_second": 99.82,
6
+ "eval_steps_per_second": 6.255,
7
+ "total_flos": 5.277760478993449e+17,
8
+ "train_loss": 0.7607251715660095,
9
+ "train_runtime": 2991.8413,
10
+ "train_samples_per_second": 85.232,
11
+ "train_steps_per_second": 5.328
12
+ }
llama3_8b_peft/news_commentary_it/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.509410288582183,
3
+ "eval_loss": 0.766462504863739,
4
+ "eval_runtime": 45.0811,
5
+ "eval_samples_per_second": 99.82,
6
+ "eval_steps_per_second": 6.255
7
+ }
llama3_8b_peft/news_commentary_it/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/news_commentary_it/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/news_commentary_it/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }
llama3_8b_peft/news_commentary_it/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.509410288582183,
3
+ "total_flos": 5.277760478993449e+17,
4
+ "train_loss": 0.7607251715660095,
5
+ "train_runtime": 2991.8413,
6
+ "train_samples_per_second": 85.232,
7
+ "train_steps_per_second": 5.328
8
+ }
llama3_8b_peft/news_commentary_it/trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/news_commentary_it/trainer_state.json ADDED
@@ -0,0 +1,2990 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.766462504863739,
3
+ "best_model_checkpoint": "ckpt/llama3_8b_fuze27_no_sys/news_commentary_it_no_sys/checkpoint-3000",
4
+ "epoch": 2.509410288582183,
5
+ "eval_steps": 200,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.006273525721455458,
13
+ "grad_norm": 1.2104142904281616,
14
+ "learning_rate": 5e-06,
15
+ "loss": 1.2602,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.012547051442910916,
20
+ "grad_norm": 1.1289334297180176,
21
+ "learning_rate": 1e-05,
22
+ "loss": 1.2867,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.018820577164366373,
27
+ "grad_norm": 2.0024657249450684,
28
+ "learning_rate": 9.999990264607035e-06,
29
+ "loss": 1.2463,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.025094102885821833,
34
+ "grad_norm": 0.9999326467514038,
35
+ "learning_rate": 9.999961058466052e-06,
36
+ "loss": 1.0827,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.03136762860727729,
41
+ "grad_norm": 0.9281232953071594,
42
+ "learning_rate": 9.999912381690781e-06,
43
+ "loss": 1.0075,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.037641154328732745,
48
+ "grad_norm": 1.1647788286209106,
49
+ "learning_rate": 9.999844234470782e-06,
50
+ "loss": 0.9581,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.043914680050188205,
55
+ "grad_norm": 1.1726807355880737,
56
+ "learning_rate": 9.999756617071427e-06,
57
+ "loss": 0.9374,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.050188205771643665,
62
+ "grad_norm": 1.0420453548431396,
63
+ "learning_rate": 9.999649529833915e-06,
64
+ "loss": 0.9236,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.056461731493099125,
69
+ "grad_norm": 1.0071817636489868,
70
+ "learning_rate": 9.999522973175257e-06,
71
+ "loss": 0.8694,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.06273525721455459,
76
+ "grad_norm": 1.1347712278366089,
77
+ "learning_rate": 9.999376947588288e-06,
78
+ "loss": 0.8946,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.06900878293601004,
83
+ "grad_norm": 0.9570572972297668,
84
+ "learning_rate": 9.99921145364165e-06,
85
+ "loss": 0.9117,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.07528230865746549,
90
+ "grad_norm": 1.4362834692001343,
91
+ "learning_rate": 9.999026491979809e-06,
92
+ "loss": 0.8481,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.08155583437892096,
97
+ "grad_norm": 1.6468892097473145,
98
+ "learning_rate": 9.99882206332303e-06,
99
+ "loss": 0.9129,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.08782936010037641,
104
+ "grad_norm": 1.1469831466674805,
105
+ "learning_rate": 9.99859816846739e-06,
106
+ "loss": 0.8671,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.09410288582183186,
111
+ "grad_norm": 1.1499881744384766,
112
+ "learning_rate": 9.998354808284774e-06,
113
+ "loss": 0.8156,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.10037641154328733,
118
+ "grad_norm": 1.2852425575256348,
119
+ "learning_rate": 9.998091983722862e-06,
120
+ "loss": 0.875,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.10664993726474278,
125
+ "grad_norm": 1.1644378900527954,
126
+ "learning_rate": 9.997809695805136e-06,
127
+ "loss": 0.8921,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.11292346298619825,
132
+ "grad_norm": 1.0868431329727173,
133
+ "learning_rate": 9.99750794563087e-06,
134
+ "loss": 0.8171,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.1191969887076537,
139
+ "grad_norm": 1.2835919857025146,
140
+ "learning_rate": 9.997186734375124e-06,
141
+ "loss": 0.7668,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.12547051442910917,
146
+ "grad_norm": 1.230720043182373,
147
+ "learning_rate": 9.996846063288746e-06,
148
+ "loss": 0.8384,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.12547051442910917,
153
+ "eval_loss": 0.8426533937454224,
154
+ "eval_runtime": 44.7105,
155
+ "eval_samples_per_second": 100.648,
156
+ "eval_steps_per_second": 6.307,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.13174404015056462,
161
+ "grad_norm": 1.0766535997390747,
162
+ "learning_rate": 9.996485933698364e-06,
163
+ "loss": 0.7462,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.13801756587202008,
168
+ "grad_norm": 1.3286361694335938,
169
+ "learning_rate": 9.996106347006378e-06,
170
+ "loss": 0.7903,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.14429109159347553,
175
+ "grad_norm": 1.2919267416000366,
176
+ "learning_rate": 9.99570730469096e-06,
177
+ "loss": 0.8335,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.15056461731493098,
182
+ "grad_norm": 1.795588731765747,
183
+ "learning_rate": 9.995288808306041e-06,
184
+ "loss": 0.8329,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.15683814303638646,
189
+ "grad_norm": 1.1396249532699585,
190
+ "learning_rate": 9.994850859481312e-06,
191
+ "loss": 0.8034,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.16311166875784192,
196
+ "grad_norm": 1.3285330533981323,
197
+ "learning_rate": 9.994393459922219e-06,
198
+ "loss": 0.8021,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.16938519447929737,
203
+ "grad_norm": 1.16619873046875,
204
+ "learning_rate": 9.993916611409941e-06,
205
+ "loss": 0.7863,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.17565872020075282,
210
+ "grad_norm": 1.4060382843017578,
211
+ "learning_rate": 9.993420315801406e-06,
212
+ "loss": 0.8201,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.18193224592220827,
217
+ "grad_norm": 1.353031039237976,
218
+ "learning_rate": 9.992904575029265e-06,
219
+ "loss": 0.8247,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.18820577164366373,
224
+ "grad_norm": 1.584989309310913,
225
+ "learning_rate": 9.992369391101895e-06,
226
+ "loss": 0.8314,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.1944792973651192,
231
+ "grad_norm": 1.7190700769424438,
232
+ "learning_rate": 9.991814766103386e-06,
233
+ "loss": 0.7779,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.20075282308657466,
238
+ "grad_norm": 0.9778804183006287,
239
+ "learning_rate": 9.991240702193532e-06,
240
+ "loss": 0.7836,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.20702634880803011,
245
+ "grad_norm": 1.2949808835983276,
246
+ "learning_rate": 9.99064720160783e-06,
247
+ "loss": 0.8216,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.21329987452948557,
252
+ "grad_norm": 1.5262751579284668,
253
+ "learning_rate": 9.990034266657468e-06,
254
+ "loss": 0.825,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.21957340025094102,
259
+ "grad_norm": 1.608786940574646,
260
+ "learning_rate": 9.989401899729307e-06,
261
+ "loss": 0.8446,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.2258469259723965,
266
+ "grad_norm": 1.8623838424682617,
267
+ "learning_rate": 9.988750103285883e-06,
268
+ "loss": 0.8312,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.23212045169385195,
273
+ "grad_norm": 1.2693721055984497,
274
+ "learning_rate": 9.988078879865396e-06,
275
+ "loss": 0.837,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.2383939774153074,
280
+ "grad_norm": 1.1806577444076538,
281
+ "learning_rate": 9.987388232081694e-06,
282
+ "loss": 0.7946,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.24466750313676286,
287
+ "grad_norm": 1.4412263631820679,
288
+ "learning_rate": 9.98667816262427e-06,
289
+ "loss": 0.8401,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 0.25094102885821834,
294
+ "grad_norm": 1.6300259828567505,
295
+ "learning_rate": 9.985948674258243e-06,
296
+ "loss": 0.8033,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 0.25094102885821834,
301
+ "eval_loss": 0.8175402283668518,
302
+ "eval_runtime": 44.9755,
303
+ "eval_samples_per_second": 100.055,
304
+ "eval_steps_per_second": 6.27,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 0.2572145545796738,
309
+ "grad_norm": 1.5807448625564575,
310
+ "learning_rate": 9.985199769824359e-06,
311
+ "loss": 0.7178,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 0.26348808030112925,
316
+ "grad_norm": 1.4177653789520264,
317
+ "learning_rate": 9.984431452238968e-06,
318
+ "loss": 0.8392,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 0.2697616060225847,
323
+ "grad_norm": 1.7573360204696655,
324
+ "learning_rate": 9.983643724494017e-06,
325
+ "loss": 0.8,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 0.27603513174404015,
330
+ "grad_norm": 1.576133131980896,
331
+ "learning_rate": 9.982836589657043e-06,
332
+ "loss": 0.78,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 0.2823086574654956,
337
+ "grad_norm": 1.1506612300872803,
338
+ "learning_rate": 9.98201005087116e-06,
339
+ "loss": 0.7949,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 0.28858218318695106,
344
+ "grad_norm": 1.5310416221618652,
345
+ "learning_rate": 9.981164111355036e-06,
346
+ "loss": 0.7949,
347
+ "step": 460
348
+ },
349
+ {
350
+ "epoch": 0.2948557089084065,
351
+ "grad_norm": 1.714012861251831,
352
+ "learning_rate": 9.98029877440289e-06,
353
+ "loss": 0.8617,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 0.30112923462986196,
358
+ "grad_norm": 1.2436264753341675,
359
+ "learning_rate": 9.979414043384485e-06,
360
+ "loss": 0.807,
361
+ "step": 480
362
+ },
363
+ {
364
+ "epoch": 0.3074027603513174,
365
+ "grad_norm": 1.6324926614761353,
366
+ "learning_rate": 9.978509921745101e-06,
367
+ "loss": 0.8101,
368
+ "step": 490
369
+ },
370
+ {
371
+ "epoch": 0.3136762860727729,
372
+ "grad_norm": 1.8093546628952026,
373
+ "learning_rate": 9.97758641300553e-06,
374
+ "loss": 0.8332,
375
+ "step": 500
376
+ },
377
+ {
378
+ "epoch": 0.3199498117942284,
379
+ "grad_norm": 1.3757269382476807,
380
+ "learning_rate": 9.97664352076206e-06,
381
+ "loss": 0.7836,
382
+ "step": 510
383
+ },
384
+ {
385
+ "epoch": 0.32622333751568383,
386
+ "grad_norm": 1.6645439863204956,
387
+ "learning_rate": 9.97568124868646e-06,
388
+ "loss": 0.8991,
389
+ "step": 520
390
+ },
391
+ {
392
+ "epoch": 0.3324968632371393,
393
+ "grad_norm": 1.5574431419372559,
394
+ "learning_rate": 9.974699600525972e-06,
395
+ "loss": 0.8219,
396
+ "step": 530
397
+ },
398
+ {
399
+ "epoch": 0.33877038895859474,
400
+ "grad_norm": 1.915824055671692,
401
+ "learning_rate": 9.973698580103286e-06,
402
+ "loss": 0.7896,
403
+ "step": 540
404
+ },
405
+ {
406
+ "epoch": 0.3450439146800502,
407
+ "grad_norm": 2.2009265422821045,
408
+ "learning_rate": 9.972678191316533e-06,
409
+ "loss": 0.8181,
410
+ "step": 550
411
+ },
412
+ {
413
+ "epoch": 0.35131744040150564,
414
+ "grad_norm": 1.163510799407959,
415
+ "learning_rate": 9.971638438139266e-06,
416
+ "loss": 0.7855,
417
+ "step": 560
418
+ },
419
+ {
420
+ "epoch": 0.3575909661229611,
421
+ "grad_norm": 1.4998867511749268,
422
+ "learning_rate": 9.97057932462045e-06,
423
+ "loss": 0.804,
424
+ "step": 570
425
+ },
426
+ {
427
+ "epoch": 0.36386449184441655,
428
+ "grad_norm": 1.5082648992538452,
429
+ "learning_rate": 9.96950085488444e-06,
430
+ "loss": 0.746,
431
+ "step": 580
432
+ },
433
+ {
434
+ "epoch": 0.370138017565872,
435
+ "grad_norm": 1.9336061477661133,
436
+ "learning_rate": 9.968403033130963e-06,
437
+ "loss": 0.8339,
438
+ "step": 590
439
+ },
440
+ {
441
+ "epoch": 0.37641154328732745,
442
+ "grad_norm": 1.1680142879486084,
443
+ "learning_rate": 9.967285863635112e-06,
444
+ "loss": 0.747,
445
+ "step": 600
446
+ },
447
+ {
448
+ "epoch": 0.37641154328732745,
449
+ "eval_loss": 0.8062916994094849,
450
+ "eval_runtime": 44.9586,
451
+ "eval_samples_per_second": 100.092,
452
+ "eval_steps_per_second": 6.272,
453
+ "step": 600
454
+ },
455
+ {
456
+ "epoch": 0.38268506900878296,
457
+ "grad_norm": 1.6282708644866943,
458
+ "learning_rate": 9.966149350747321e-06,
459
+ "loss": 0.8439,
460
+ "step": 610
461
+ },
462
+ {
463
+ "epoch": 0.3889585947302384,
464
+ "grad_norm": 2.040297746658325,
465
+ "learning_rate": 9.964993498893349e-06,
466
+ "loss": 0.8309,
467
+ "step": 620
468
+ },
469
+ {
470
+ "epoch": 0.39523212045169387,
471
+ "grad_norm": 2.0438711643218994,
472
+ "learning_rate": 9.963818312574265e-06,
473
+ "loss": 0.8116,
474
+ "step": 630
475
+ },
476
+ {
477
+ "epoch": 0.4015056461731493,
478
+ "grad_norm": 1.4836337566375732,
479
+ "learning_rate": 9.962623796366428e-06,
480
+ "loss": 0.8543,
481
+ "step": 640
482
+ },
483
+ {
484
+ "epoch": 0.4077791718946048,
485
+ "grad_norm": 1.824879765510559,
486
+ "learning_rate": 9.961409954921472e-06,
487
+ "loss": 0.84,
488
+ "step": 650
489
+ },
490
+ {
491
+ "epoch": 0.41405269761606023,
492
+ "grad_norm": 1.2984076738357544,
493
+ "learning_rate": 9.96017679296629e-06,
494
+ "loss": 0.7914,
495
+ "step": 660
496
+ },
497
+ {
498
+ "epoch": 0.4203262233375157,
499
+ "grad_norm": 1.591729998588562,
500
+ "learning_rate": 9.958924315303005e-06,
501
+ "loss": 0.7888,
502
+ "step": 670
503
+ },
504
+ {
505
+ "epoch": 0.42659974905897113,
506
+ "grad_norm": 1.267513394355774,
507
+ "learning_rate": 9.95765252680896e-06,
508
+ "loss": 0.7764,
509
+ "step": 680
510
+ },
511
+ {
512
+ "epoch": 0.4328732747804266,
513
+ "grad_norm": 1.9557582139968872,
514
+ "learning_rate": 9.956361432436705e-06,
515
+ "loss": 0.7743,
516
+ "step": 690
517
+ },
518
+ {
519
+ "epoch": 0.43914680050188204,
520
+ "grad_norm": 1.376629114151001,
521
+ "learning_rate": 9.95505103721396e-06,
522
+ "loss": 0.7849,
523
+ "step": 700
524
+ },
525
+ {
526
+ "epoch": 0.4454203262233375,
527
+ "grad_norm": 2.3298721313476562,
528
+ "learning_rate": 9.953721346243613e-06,
529
+ "loss": 0.8004,
530
+ "step": 710
531
+ },
532
+ {
533
+ "epoch": 0.451693851944793,
534
+ "grad_norm": 1.447426676750183,
535
+ "learning_rate": 9.952372364703688e-06,
536
+ "loss": 0.7069,
537
+ "step": 720
538
+ },
539
+ {
540
+ "epoch": 0.45796737766624845,
541
+ "grad_norm": 1.4724068641662598,
542
+ "learning_rate": 9.95100409784733e-06,
543
+ "loss": 0.7746,
544
+ "step": 730
545
+ },
546
+ {
547
+ "epoch": 0.4642409033877039,
548
+ "grad_norm": 1.3427478075027466,
549
+ "learning_rate": 9.949616551002787e-06,
550
+ "loss": 0.8052,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 0.47051442910915936,
555
+ "grad_norm": 2.371738910675049,
556
+ "learning_rate": 9.948209729573384e-06,
557
+ "loss": 0.7864,
558
+ "step": 750
559
+ },
560
+ {
561
+ "epoch": 0.4767879548306148,
562
+ "grad_norm": 1.5304419994354248,
563
+ "learning_rate": 9.946783639037503e-06,
564
+ "loss": 0.7596,
565
+ "step": 760
566
+ },
567
+ {
568
+ "epoch": 0.48306148055207027,
569
+ "grad_norm": 1.9773509502410889,
570
+ "learning_rate": 9.945338284948568e-06,
571
+ "loss": 0.8651,
572
+ "step": 770
573
+ },
574
+ {
575
+ "epoch": 0.4893350062735257,
576
+ "grad_norm": 1.6860358715057373,
577
+ "learning_rate": 9.943873672935014e-06,
578
+ "loss": 0.8396,
579
+ "step": 780
580
+ },
581
+ {
582
+ "epoch": 0.49560853199498117,
583
+ "grad_norm": 1.711647868156433,
584
+ "learning_rate": 9.94238980870027e-06,
585
+ "loss": 0.7831,
586
+ "step": 790
587
+ },
588
+ {
589
+ "epoch": 0.5018820577164367,
590
+ "grad_norm": 1.318320870399475,
591
+ "learning_rate": 9.940886698022733e-06,
592
+ "loss": 0.8685,
593
+ "step": 800
594
+ },
595
+ {
596
+ "epoch": 0.5018820577164367,
597
+ "eval_loss": 0.7982689738273621,
598
+ "eval_runtime": 45.1005,
599
+ "eval_samples_per_second": 99.777,
600
+ "eval_steps_per_second": 6.253,
601
+ "step": 800
602
+ },
603
+ {
604
+ "epoch": 0.5081555834378921,
605
+ "grad_norm": 1.4997131824493408,
606
+ "learning_rate": 9.93936434675576e-06,
607
+ "loss": 0.795,
608
+ "step": 810
609
+ },
610
+ {
611
+ "epoch": 0.5144291091593476,
612
+ "grad_norm": 1.4836647510528564,
613
+ "learning_rate": 9.93782276082762e-06,
614
+ "loss": 0.8058,
615
+ "step": 820
616
+ },
617
+ {
618
+ "epoch": 0.520702634880803,
619
+ "grad_norm": 1.9538437128067017,
620
+ "learning_rate": 9.936261946241492e-06,
621
+ "loss": 0.8111,
622
+ "step": 830
623
+ },
624
+ {
625
+ "epoch": 0.5269761606022585,
626
+ "grad_norm": 1.628666639328003,
627
+ "learning_rate": 9.934681909075434e-06,
628
+ "loss": 0.8313,
629
+ "step": 840
630
+ },
631
+ {
632
+ "epoch": 0.533249686323714,
633
+ "grad_norm": 1.8225489854812622,
634
+ "learning_rate": 9.93308265548236e-06,
635
+ "loss": 0.786,
636
+ "step": 850
637
+ },
638
+ {
639
+ "epoch": 0.5395232120451694,
640
+ "grad_norm": 1.4251854419708252,
641
+ "learning_rate": 9.931464191690015e-06,
642
+ "loss": 0.7471,
643
+ "step": 860
644
+ },
645
+ {
646
+ "epoch": 0.5457967377666249,
647
+ "grad_norm": 1.6992876529693604,
648
+ "learning_rate": 9.929826524000948e-06,
649
+ "loss": 0.7997,
650
+ "step": 870
651
+ },
652
+ {
653
+ "epoch": 0.5520702634880803,
654
+ "grad_norm": 1.8453644514083862,
655
+ "learning_rate": 9.928169658792498e-06,
656
+ "loss": 0.8167,
657
+ "step": 880
658
+ },
659
+ {
660
+ "epoch": 0.5583437892095358,
661
+ "grad_norm": 1.670644998550415,
662
+ "learning_rate": 9.926493602516758e-06,
663
+ "loss": 0.7689,
664
+ "step": 890
665
+ },
666
+ {
667
+ "epoch": 0.5646173149309912,
668
+ "grad_norm": 1.8147343397140503,
669
+ "learning_rate": 9.924798361700554e-06,
670
+ "loss": 0.7747,
671
+ "step": 900
672
+ },
673
+ {
674
+ "epoch": 0.5708908406524467,
675
+ "grad_norm": 1.320773720741272,
676
+ "learning_rate": 9.923083942945419e-06,
677
+ "loss": 0.8306,
678
+ "step": 910
679
+ },
680
+ {
681
+ "epoch": 0.5771643663739021,
682
+ "grad_norm": 1.4833663702011108,
683
+ "learning_rate": 9.92135035292757e-06,
684
+ "loss": 0.8039,
685
+ "step": 920
686
+ },
687
+ {
688
+ "epoch": 0.5834378920953576,
689
+ "grad_norm": 1.6530499458312988,
690
+ "learning_rate": 9.919597598397882e-06,
691
+ "loss": 0.8893,
692
+ "step": 930
693
+ },
694
+ {
695
+ "epoch": 0.589711417816813,
696
+ "grad_norm": 1.5760563611984253,
697
+ "learning_rate": 9.91782568618185e-06,
698
+ "loss": 0.7866,
699
+ "step": 940
700
+ },
701
+ {
702
+ "epoch": 0.5959849435382685,
703
+ "grad_norm": 1.702140212059021,
704
+ "learning_rate": 9.916034623179584e-06,
705
+ "loss": 0.7815,
706
+ "step": 950
707
+ },
708
+ {
709
+ "epoch": 0.6022584692597239,
710
+ "grad_norm": 2.266418933868408,
711
+ "learning_rate": 9.914224416365765e-06,
712
+ "loss": 0.8233,
713
+ "step": 960
714
+ },
715
+ {
716
+ "epoch": 0.6085319949811794,
717
+ "grad_norm": 1.9135664701461792,
718
+ "learning_rate": 9.91239507278962e-06,
719
+ "loss": 0.7989,
720
+ "step": 970
721
+ },
722
+ {
723
+ "epoch": 0.6148055207026348,
724
+ "grad_norm": 2.0764544010162354,
725
+ "learning_rate": 9.910546599574903e-06,
726
+ "loss": 0.8017,
727
+ "step": 980
728
+ },
729
+ {
730
+ "epoch": 0.6210790464240903,
731
+ "grad_norm": 2.2536275386810303,
732
+ "learning_rate": 9.908679003919856e-06,
733
+ "loss": 0.7417,
734
+ "step": 990
735
+ },
736
+ {
737
+ "epoch": 0.6273525721455459,
738
+ "grad_norm": 1.5561891794204712,
739
+ "learning_rate": 9.906792293097194e-06,
740
+ "loss": 0.7784,
741
+ "step": 1000
742
+ },
743
+ {
744
+ "epoch": 0.6273525721455459,
745
+ "eval_loss": 0.7933768630027771,
746
+ "eval_runtime": 45.2372,
747
+ "eval_samples_per_second": 99.476,
748
+ "eval_steps_per_second": 6.234,
749
+ "step": 1000
750
+ },
751
+ {
752
+ "epoch": 0.6336260978670013,
753
+ "grad_norm": 1.9871364831924438,
754
+ "learning_rate": 9.904886474454063e-06,
755
+ "loss": 0.7728,
756
+ "step": 1010
757
+ },
758
+ {
759
+ "epoch": 0.6398996235884568,
760
+ "grad_norm": 1.6717829704284668,
761
+ "learning_rate": 9.90296155541202e-06,
762
+ "loss": 0.7703,
763
+ "step": 1020
764
+ },
765
+ {
766
+ "epoch": 0.6461731493099122,
767
+ "grad_norm": 1.9863123893737793,
768
+ "learning_rate": 9.901017543467005e-06,
769
+ "loss": 0.7908,
770
+ "step": 1030
771
+ },
772
+ {
773
+ "epoch": 0.6524466750313677,
774
+ "grad_norm": 1.7507809400558472,
775
+ "learning_rate": 9.899054446189305e-06,
776
+ "loss": 0.7643,
777
+ "step": 1040
778
+ },
779
+ {
780
+ "epoch": 0.6587202007528231,
781
+ "grad_norm": 1.9848119020462036,
782
+ "learning_rate": 9.897072271223526e-06,
783
+ "loss": 0.8438,
784
+ "step": 1050
785
+ },
786
+ {
787
+ "epoch": 0.6649937264742786,
788
+ "grad_norm": 1.7975581884384155,
789
+ "learning_rate": 9.895071026288574e-06,
790
+ "loss": 0.7795,
791
+ "step": 1060
792
+ },
793
+ {
794
+ "epoch": 0.671267252195734,
795
+ "grad_norm": 1.4088472127914429,
796
+ "learning_rate": 9.893050719177608e-06,
797
+ "loss": 0.7994,
798
+ "step": 1070
799
+ },
800
+ {
801
+ "epoch": 0.6775407779171895,
802
+ "grad_norm": 1.8958938121795654,
803
+ "learning_rate": 9.891011357758022e-06,
804
+ "loss": 0.7811,
805
+ "step": 1080
806
+ },
807
+ {
808
+ "epoch": 0.6838143036386449,
809
+ "grad_norm": 1.9438494443893433,
810
+ "learning_rate": 9.888952949971411e-06,
811
+ "loss": 0.7872,
812
+ "step": 1090
813
+ },
814
+ {
815
+ "epoch": 0.6900878293601004,
816
+ "grad_norm": 1.719403862953186,
817
+ "learning_rate": 9.886875503833537e-06,
818
+ "loss": 0.8298,
819
+ "step": 1100
820
+ },
821
+ {
822
+ "epoch": 0.6963613550815558,
823
+ "grad_norm": 1.8520464897155762,
824
+ "learning_rate": 9.884779027434304e-06,
825
+ "loss": 0.8422,
826
+ "step": 1110
827
+ },
828
+ {
829
+ "epoch": 0.7026348808030113,
830
+ "grad_norm": 2.0593104362487793,
831
+ "learning_rate": 9.882663528937716e-06,
832
+ "loss": 0.8111,
833
+ "step": 1120
834
+ },
835
+ {
836
+ "epoch": 0.7089084065244667,
837
+ "grad_norm": 1.8102694749832153,
838
+ "learning_rate": 9.880529016581863e-06,
839
+ "loss": 0.8086,
840
+ "step": 1130
841
+ },
842
+ {
843
+ "epoch": 0.7151819322459222,
844
+ "grad_norm": 1.670199990272522,
845
+ "learning_rate": 9.878375498678869e-06,
846
+ "loss": 0.77,
847
+ "step": 1140
848
+ },
849
+ {
850
+ "epoch": 0.7214554579673776,
851
+ "grad_norm": 1.8369115591049194,
852
+ "learning_rate": 9.876202983614868e-06,
853
+ "loss": 0.7775,
854
+ "step": 1150
855
+ },
856
+ {
857
+ "epoch": 0.7277289836888331,
858
+ "grad_norm": 1.802253007888794,
859
+ "learning_rate": 9.874011479849981e-06,
860
+ "loss": 0.7478,
861
+ "step": 1160
862
+ },
863
+ {
864
+ "epoch": 0.7340025094102886,
865
+ "grad_norm": 1.666491985321045,
866
+ "learning_rate": 9.871800995918264e-06,
867
+ "loss": 0.7733,
868
+ "step": 1170
869
+ },
870
+ {
871
+ "epoch": 0.740276035131744,
872
+ "grad_norm": 1.9356104135513306,
873
+ "learning_rate": 9.86957154042769e-06,
874
+ "loss": 0.7972,
875
+ "step": 1180
876
+ },
877
+ {
878
+ "epoch": 0.7465495608531995,
879
+ "grad_norm": 1.471297264099121,
880
+ "learning_rate": 9.867323122060108e-06,
881
+ "loss": 0.7978,
882
+ "step": 1190
883
+ },
884
+ {
885
+ "epoch": 0.7528230865746549,
886
+ "grad_norm": 1.5875897407531738,
887
+ "learning_rate": 9.865055749571215e-06,
888
+ "loss": 0.7192,
889
+ "step": 1200
890
+ },
891
+ {
892
+ "epoch": 0.7528230865746549,
893
+ "eval_loss": 0.7873027324676514,
894
+ "eval_runtime": 45.6574,
895
+ "eval_samples_per_second": 98.56,
896
+ "eval_steps_per_second": 6.176,
897
+ "step": 1200
898
+ },
899
+ {
900
+ "epoch": 0.7590966122961104,
901
+ "grad_norm": 1.8673670291900635,
902
+ "learning_rate": 9.862769431790513e-06,
903
+ "loss": 0.7218,
904
+ "step": 1210
905
+ },
906
+ {
907
+ "epoch": 0.7653701380175659,
908
+ "grad_norm": 2.313070058822632,
909
+ "learning_rate": 9.860464177621286e-06,
910
+ "loss": 0.7912,
911
+ "step": 1220
912
+ },
913
+ {
914
+ "epoch": 0.7716436637390214,
915
+ "grad_norm": 1.7745637893676758,
916
+ "learning_rate": 9.858139996040554e-06,
917
+ "loss": 0.7481,
918
+ "step": 1230
919
+ },
920
+ {
921
+ "epoch": 0.7779171894604768,
922
+ "grad_norm": 2.0406556129455566,
923
+ "learning_rate": 9.855796896099044e-06,
924
+ "loss": 0.7995,
925
+ "step": 1240
926
+ },
927
+ {
928
+ "epoch": 0.7841907151819323,
929
+ "grad_norm": 1.914360761642456,
930
+ "learning_rate": 9.85343488692116e-06,
931
+ "loss": 0.7552,
932
+ "step": 1250
933
+ },
934
+ {
935
+ "epoch": 0.7904642409033877,
936
+ "grad_norm": 2.313922166824341,
937
+ "learning_rate": 9.851053977704931e-06,
938
+ "loss": 0.7911,
939
+ "step": 1260
940
+ },
941
+ {
942
+ "epoch": 0.7967377666248432,
943
+ "grad_norm": 1.643646001815796,
944
+ "learning_rate": 9.848654177721999e-06,
945
+ "loss": 0.7974,
946
+ "step": 1270
947
+ },
948
+ {
949
+ "epoch": 0.8030112923462986,
950
+ "grad_norm": 1.9029057025909424,
951
+ "learning_rate": 9.846235496317556e-06,
952
+ "loss": 0.7871,
953
+ "step": 1280
954
+ },
955
+ {
956
+ "epoch": 0.8092848180677541,
957
+ "grad_norm": 2.5715513229370117,
958
+ "learning_rate": 9.843797942910328e-06,
959
+ "loss": 0.7374,
960
+ "step": 1290
961
+ },
962
+ {
963
+ "epoch": 0.8155583437892095,
964
+ "grad_norm": 1.688032865524292,
965
+ "learning_rate": 9.841341526992536e-06,
966
+ "loss": 0.7798,
967
+ "step": 1300
968
+ },
969
+ {
970
+ "epoch": 0.821831869510665,
971
+ "grad_norm": 2.190502166748047,
972
+ "learning_rate": 9.838866258129847e-06,
973
+ "loss": 0.8417,
974
+ "step": 1310
975
+ },
976
+ {
977
+ "epoch": 0.8281053952321205,
978
+ "grad_norm": 1.6088762283325195,
979
+ "learning_rate": 9.836372145961346e-06,
980
+ "loss": 0.8111,
981
+ "step": 1320
982
+ },
983
+ {
984
+ "epoch": 0.8343789209535759,
985
+ "grad_norm": 1.9306305646896362,
986
+ "learning_rate": 9.833859200199498e-06,
987
+ "loss": 0.7555,
988
+ "step": 1330
989
+ },
990
+ {
991
+ "epoch": 0.8406524466750314,
992
+ "grad_norm": 1.9311903715133667,
993
+ "learning_rate": 9.83132743063011e-06,
994
+ "loss": 0.8246,
995
+ "step": 1340
996
+ },
997
+ {
998
+ "epoch": 0.8469259723964868,
999
+ "grad_norm": 2.0140748023986816,
1000
+ "learning_rate": 9.82877684711229e-06,
1001
+ "loss": 0.8184,
1002
+ "step": 1350
1003
+ },
1004
+ {
1005
+ "epoch": 0.8531994981179423,
1006
+ "grad_norm": 1.8832004070281982,
1007
+ "learning_rate": 9.826207459578412e-06,
1008
+ "loss": 0.7929,
1009
+ "step": 1360
1010
+ },
1011
+ {
1012
+ "epoch": 0.8594730238393977,
1013
+ "grad_norm": 1.7988462448120117,
1014
+ "learning_rate": 9.823619278034073e-06,
1015
+ "loss": 0.8003,
1016
+ "step": 1370
1017
+ },
1018
+ {
1019
+ "epoch": 0.8657465495608532,
1020
+ "grad_norm": 1.436877727508545,
1021
+ "learning_rate": 9.821012312558059e-06,
1022
+ "loss": 0.813,
1023
+ "step": 1380
1024
+ },
1025
+ {
1026
+ "epoch": 0.8720200752823086,
1027
+ "grad_norm": 2.726731300354004,
1028
+ "learning_rate": 9.818386573302305e-06,
1029
+ "loss": 0.7654,
1030
+ "step": 1390
1031
+ },
1032
+ {
1033
+ "epoch": 0.8782936010037641,
1034
+ "grad_norm": 1.972068190574646,
1035
+ "learning_rate": 9.815742070491852e-06,
1036
+ "loss": 0.7894,
1037
+ "step": 1400
1038
+ },
1039
+ {
1040
+ "epoch": 0.8782936010037641,
1041
+ "eval_loss": 0.7845989465713501,
1042
+ "eval_runtime": 45.2794,
1043
+ "eval_samples_per_second": 99.383,
1044
+ "eval_steps_per_second": 6.228,
1045
+ "step": 1400
1046
+ },
1047
+ {
1048
+ "epoch": 0.8845671267252195,
1049
+ "grad_norm": 2.338796854019165,
1050
+ "learning_rate": 9.81307881442481e-06,
1051
+ "loss": 0.7384,
1052
+ "step": 1410
1053
+ },
1054
+ {
1055
+ "epoch": 0.890840652446675,
1056
+ "grad_norm": 2.0138137340545654,
1057
+ "learning_rate": 9.810396815472316e-06,
1058
+ "loss": 0.773,
1059
+ "step": 1420
1060
+ },
1061
+ {
1062
+ "epoch": 0.8971141781681304,
1063
+ "grad_norm": 1.6011505126953125,
1064
+ "learning_rate": 9.807696084078494e-06,
1065
+ "loss": 0.7837,
1066
+ "step": 1430
1067
+ },
1068
+ {
1069
+ "epoch": 0.903387703889586,
1070
+ "grad_norm": 1.828351616859436,
1071
+ "learning_rate": 9.804976630760419e-06,
1072
+ "loss": 0.8331,
1073
+ "step": 1440
1074
+ },
1075
+ {
1076
+ "epoch": 0.9096612296110415,
1077
+ "grad_norm": 2.2111215591430664,
1078
+ "learning_rate": 9.802238466108068e-06,
1079
+ "loss": 0.8119,
1080
+ "step": 1450
1081
+ },
1082
+ {
1083
+ "epoch": 0.9159347553324969,
1084
+ "grad_norm": 1.771132230758667,
1085
+ "learning_rate": 9.799481600784286e-06,
1086
+ "loss": 0.8287,
1087
+ "step": 1460
1088
+ },
1089
+ {
1090
+ "epoch": 0.9222082810539524,
1091
+ "grad_norm": 2.1488120555877686,
1092
+ "learning_rate": 9.796706045524738e-06,
1093
+ "loss": 0.7751,
1094
+ "step": 1470
1095
+ },
1096
+ {
1097
+ "epoch": 0.9284818067754078,
1098
+ "grad_norm": 3.2007815837860107,
1099
+ "learning_rate": 9.793911811137874e-06,
1100
+ "loss": 0.7264,
1101
+ "step": 1480
1102
+ },
1103
+ {
1104
+ "epoch": 0.9347553324968633,
1105
+ "grad_norm": 1.445391058921814,
1106
+ "learning_rate": 9.791098908504884e-06,
1107
+ "loss": 0.8592,
1108
+ "step": 1490
1109
+ },
1110
+ {
1111
+ "epoch": 0.9410288582183187,
1112
+ "grad_norm": 1.7853851318359375,
1113
+ "learning_rate": 9.788267348579649e-06,
1114
+ "loss": 0.7702,
1115
+ "step": 1500
1116
+ },
1117
+ {
1118
+ "epoch": 0.9473023839397742,
1119
+ "grad_norm": 1.9368916749954224,
1120
+ "learning_rate": 9.78541714238871e-06,
1121
+ "loss": 0.7685,
1122
+ "step": 1510
1123
+ },
1124
+ {
1125
+ "epoch": 0.9535759096612296,
1126
+ "grad_norm": 1.652959942817688,
1127
+ "learning_rate": 9.782548301031218e-06,
1128
+ "loss": 0.7715,
1129
+ "step": 1520
1130
+ },
1131
+ {
1132
+ "epoch": 0.9598494353826851,
1133
+ "grad_norm": 1.6748148202896118,
1134
+ "learning_rate": 9.77966083567889e-06,
1135
+ "loss": 0.7347,
1136
+ "step": 1530
1137
+ },
1138
+ {
1139
+ "epoch": 0.9661229611041405,
1140
+ "grad_norm": 1.88899827003479,
1141
+ "learning_rate": 9.776754757575975e-06,
1142
+ "loss": 0.7937,
1143
+ "step": 1540
1144
+ },
1145
+ {
1146
+ "epoch": 0.972396486825596,
1147
+ "grad_norm": 1.7232940196990967,
1148
+ "learning_rate": 9.773830078039193e-06,
1149
+ "loss": 0.8053,
1150
+ "step": 1550
1151
+ },
1152
+ {
1153
+ "epoch": 0.9786700125470514,
1154
+ "grad_norm": 1.5971760749816895,
1155
+ "learning_rate": 9.77088680845771e-06,
1156
+ "loss": 0.814,
1157
+ "step": 1560
1158
+ },
1159
+ {
1160
+ "epoch": 0.9849435382685069,
1161
+ "grad_norm": 1.5581893920898438,
1162
+ "learning_rate": 9.767924960293076e-06,
1163
+ "loss": 0.8395,
1164
+ "step": 1570
1165
+ },
1166
+ {
1167
+ "epoch": 0.9912170639899623,
1168
+ "grad_norm": 1.6082885265350342,
1169
+ "learning_rate": 9.764944545079197e-06,
1170
+ "loss": 0.762,
1171
+ "step": 1580
1172
+ },
1173
+ {
1174
+ "epoch": 0.9974905897114178,
1175
+ "grad_norm": 1.701948881149292,
1176
+ "learning_rate": 9.761945574422276e-06,
1177
+ "loss": 0.8235,
1178
+ "step": 1590
1179
+ },
1180
+ {
1181
+ "epoch": 1.0037641154328734,
1182
+ "grad_norm": 2.168287754058838,
1183
+ "learning_rate": 9.758928060000779e-06,
1184
+ "loss": 0.7502,
1185
+ "step": 1600
1186
+ },
1187
+ {
1188
+ "epoch": 1.0037641154328734,
1189
+ "eval_loss": 0.7799575328826904,
1190
+ "eval_runtime": 45.6346,
1191
+ "eval_samples_per_second": 98.609,
1192
+ "eval_steps_per_second": 6.18,
1193
+ "step": 1600
1194
+ },
1195
+ {
1196
+ "epoch": 1.0100376411543288,
1197
+ "grad_norm": 2.037853240966797,
1198
+ "learning_rate": 9.755892013565377e-06,
1199
+ "loss": 0.7589,
1200
+ "step": 1610
1201
+ },
1202
+ {
1203
+ "epoch": 1.0163111668757843,
1204
+ "grad_norm": 1.8144699335098267,
1205
+ "learning_rate": 9.752837446938915e-06,
1206
+ "loss": 0.7843,
1207
+ "step": 1620
1208
+ },
1209
+ {
1210
+ "epoch": 1.0225846925972397,
1211
+ "grad_norm": 1.831265926361084,
1212
+ "learning_rate": 9.749764372016355e-06,
1213
+ "loss": 0.821,
1214
+ "step": 1630
1215
+ },
1216
+ {
1217
+ "epoch": 1.0288582183186952,
1218
+ "grad_norm": 2.159816265106201,
1219
+ "learning_rate": 9.746672800764734e-06,
1220
+ "loss": 0.7858,
1221
+ "step": 1640
1222
+ },
1223
+ {
1224
+ "epoch": 1.0351317440401506,
1225
+ "grad_norm": 1.954287052154541,
1226
+ "learning_rate": 9.743562745223118e-06,
1227
+ "loss": 0.754,
1228
+ "step": 1650
1229
+ },
1230
+ {
1231
+ "epoch": 1.041405269761606,
1232
+ "grad_norm": 1.830365777015686,
1233
+ "learning_rate": 9.740434217502549e-06,
1234
+ "loss": 0.7556,
1235
+ "step": 1660
1236
+ },
1237
+ {
1238
+ "epoch": 1.0476787954830615,
1239
+ "grad_norm": 1.762190580368042,
1240
+ "learning_rate": 9.737287229786007e-06,
1241
+ "loss": 0.7852,
1242
+ "step": 1670
1243
+ },
1244
+ {
1245
+ "epoch": 1.053952321204517,
1246
+ "grad_norm": 2.2943947315216064,
1247
+ "learning_rate": 9.734121794328358e-06,
1248
+ "loss": 0.7725,
1249
+ "step": 1680
1250
+ },
1251
+ {
1252
+ "epoch": 1.0602258469259724,
1253
+ "grad_norm": 1.9676669836044312,
1254
+ "learning_rate": 9.730937923456303e-06,
1255
+ "loss": 0.8078,
1256
+ "step": 1690
1257
+ },
1258
+ {
1259
+ "epoch": 1.066499372647428,
1260
+ "grad_norm": 1.728384256362915,
1261
+ "learning_rate": 9.727735629568335e-06,
1262
+ "loss": 0.7405,
1263
+ "step": 1700
1264
+ },
1265
+ {
1266
+ "epoch": 1.0727728983688833,
1267
+ "grad_norm": 2.186436891555786,
1268
+ "learning_rate": 9.724514925134696e-06,
1269
+ "loss": 0.7875,
1270
+ "step": 1710
1271
+ },
1272
+ {
1273
+ "epoch": 1.0790464240903388,
1274
+ "grad_norm": 1.8279480934143066,
1275
+ "learning_rate": 9.721275822697307e-06,
1276
+ "loss": 0.7385,
1277
+ "step": 1720
1278
+ },
1279
+ {
1280
+ "epoch": 1.0853199498117942,
1281
+ "grad_norm": 2.1815567016601562,
1282
+ "learning_rate": 9.718018334869748e-06,
1283
+ "loss": 0.7278,
1284
+ "step": 1730
1285
+ },
1286
+ {
1287
+ "epoch": 1.0915934755332497,
1288
+ "grad_norm": 2.4241669178009033,
1289
+ "learning_rate": 9.714742474337187e-06,
1290
+ "loss": 0.7538,
1291
+ "step": 1740
1292
+ },
1293
+ {
1294
+ "epoch": 1.0978670012547052,
1295
+ "grad_norm": 2.237373113632202,
1296
+ "learning_rate": 9.711448253856336e-06,
1297
+ "loss": 0.8285,
1298
+ "step": 1750
1299
+ },
1300
+ {
1301
+ "epoch": 1.1041405269761606,
1302
+ "grad_norm": 2.1564557552337646,
1303
+ "learning_rate": 9.708135686255415e-06,
1304
+ "loss": 0.774,
1305
+ "step": 1760
1306
+ },
1307
+ {
1308
+ "epoch": 1.110414052697616,
1309
+ "grad_norm": 1.9159622192382812,
1310
+ "learning_rate": 9.704804784434077e-06,
1311
+ "loss": 0.7162,
1312
+ "step": 1770
1313
+ },
1314
+ {
1315
+ "epoch": 1.1166875784190715,
1316
+ "grad_norm": 2.0551164150238037,
1317
+ "learning_rate": 9.701455561363378e-06,
1318
+ "loss": 0.75,
1319
+ "step": 1780
1320
+ },
1321
+ {
1322
+ "epoch": 1.122961104140527,
1323
+ "grad_norm": 2.0797348022460938,
1324
+ "learning_rate": 9.698088030085721e-06,
1325
+ "loss": 0.7717,
1326
+ "step": 1790
1327
+ },
1328
+ {
1329
+ "epoch": 1.1292346298619824,
1330
+ "grad_norm": 2.1254172325134277,
1331
+ "learning_rate": 9.694702203714801e-06,
1332
+ "loss": 0.7253,
1333
+ "step": 1800
1334
+ },
1335
+ {
1336
+ "epoch": 1.1292346298619824,
1337
+ "eval_loss": 0.7777413129806519,
1338
+ "eval_runtime": 45.1352,
1339
+ "eval_samples_per_second": 99.7,
1340
+ "eval_steps_per_second": 6.248,
1341
+ "step": 1800
1342
+ },
1343
+ {
1344
+ "epoch": 1.1355081555834379,
1345
+ "grad_norm": 2.4966630935668945,
1346
+ "learning_rate": 9.691298095435559e-06,
1347
+ "loss": 0.7075,
1348
+ "step": 1810
1349
+ },
1350
+ {
1351
+ "epoch": 1.1417816813048933,
1352
+ "grad_norm": 2.2756330966949463,
1353
+ "learning_rate": 9.687875718504126e-06,
1354
+ "loss": 0.795,
1355
+ "step": 1820
1356
+ },
1357
+ {
1358
+ "epoch": 1.1480552070263488,
1359
+ "grad_norm": 1.8628673553466797,
1360
+ "learning_rate": 9.684435086247777e-06,
1361
+ "loss": 0.7423,
1362
+ "step": 1830
1363
+ },
1364
+ {
1365
+ "epoch": 1.1543287327478042,
1366
+ "grad_norm": 3.1080915927886963,
1367
+ "learning_rate": 9.680976212064875e-06,
1368
+ "loss": 0.7379,
1369
+ "step": 1840
1370
+ },
1371
+ {
1372
+ "epoch": 1.1606022584692597,
1373
+ "grad_norm": 2.0719306468963623,
1374
+ "learning_rate": 9.677499109424818e-06,
1375
+ "loss": 0.7817,
1376
+ "step": 1850
1377
+ },
1378
+ {
1379
+ "epoch": 1.1668757841907151,
1380
+ "grad_norm": 1.9272258281707764,
1381
+ "learning_rate": 9.674003791867993e-06,
1382
+ "loss": 0.7837,
1383
+ "step": 1860
1384
+ },
1385
+ {
1386
+ "epoch": 1.1731493099121706,
1387
+ "grad_norm": 2.963129997253418,
1388
+ "learning_rate": 9.670490273005713e-06,
1389
+ "loss": 0.7685,
1390
+ "step": 1870
1391
+ },
1392
+ {
1393
+ "epoch": 1.179422835633626,
1394
+ "grad_norm": 2.2094898223876953,
1395
+ "learning_rate": 9.666958566520175e-06,
1396
+ "loss": 0.7627,
1397
+ "step": 1880
1398
+ },
1399
+ {
1400
+ "epoch": 1.1856963613550815,
1401
+ "grad_norm": 2.6565909385681152,
1402
+ "learning_rate": 9.663408686164399e-06,
1403
+ "loss": 0.7502,
1404
+ "step": 1890
1405
+ },
1406
+ {
1407
+ "epoch": 1.191969887076537,
1408
+ "grad_norm": 2.05069899559021,
1409
+ "learning_rate": 9.659840645762176e-06,
1410
+ "loss": 0.7264,
1411
+ "step": 1900
1412
+ },
1413
+ {
1414
+ "epoch": 1.1982434127979924,
1415
+ "grad_norm": 2.0309925079345703,
1416
+ "learning_rate": 9.656254459208015e-06,
1417
+ "loss": 0.7019,
1418
+ "step": 1910
1419
+ },
1420
+ {
1421
+ "epoch": 1.2045169385194479,
1422
+ "grad_norm": 1.833817720413208,
1423
+ "learning_rate": 9.652650140467094e-06,
1424
+ "loss": 0.7528,
1425
+ "step": 1920
1426
+ },
1427
+ {
1428
+ "epoch": 1.2107904642409033,
1429
+ "grad_norm": 2.237750291824341,
1430
+ "learning_rate": 9.649027703575193e-06,
1431
+ "loss": 0.7279,
1432
+ "step": 1930
1433
+ },
1434
+ {
1435
+ "epoch": 1.2170639899623588,
1436
+ "grad_norm": 2.1970582008361816,
1437
+ "learning_rate": 9.645387162638652e-06,
1438
+ "loss": 0.6951,
1439
+ "step": 1940
1440
+ },
1441
+ {
1442
+ "epoch": 1.2233375156838142,
1443
+ "grad_norm": 2.2934882640838623,
1444
+ "learning_rate": 9.641728531834313e-06,
1445
+ "loss": 0.7355,
1446
+ "step": 1950
1447
+ },
1448
+ {
1449
+ "epoch": 1.2296110414052697,
1450
+ "grad_norm": 2.0726277828216553,
1451
+ "learning_rate": 9.638051825409454e-06,
1452
+ "loss": 0.694,
1453
+ "step": 1960
1454
+ },
1455
+ {
1456
+ "epoch": 1.2358845671267251,
1457
+ "grad_norm": 2.173316478729248,
1458
+ "learning_rate": 9.634357057681749e-06,
1459
+ "loss": 0.7337,
1460
+ "step": 1970
1461
+ },
1462
+ {
1463
+ "epoch": 1.2421580928481806,
1464
+ "grad_norm": 2.1220340728759766,
1465
+ "learning_rate": 9.630644243039207e-06,
1466
+ "loss": 0.71,
1467
+ "step": 1980
1468
+ },
1469
+ {
1470
+ "epoch": 1.248431618569636,
1471
+ "grad_norm": 1.7132002115249634,
1472
+ "learning_rate": 9.62691339594011e-06,
1473
+ "loss": 0.7502,
1474
+ "step": 1990
1475
+ },
1476
+ {
1477
+ "epoch": 1.2547051442910915,
1478
+ "grad_norm": 2.3984744548797607,
1479
+ "learning_rate": 9.623164530912963e-06,
1480
+ "loss": 0.7756,
1481
+ "step": 2000
1482
+ },
1483
+ {
1484
+ "epoch": 1.2547051442910915,
1485
+ "eval_loss": 0.7758031487464905,
1486
+ "eval_runtime": 45.1114,
1487
+ "eval_samples_per_second": 99.753,
1488
+ "eval_steps_per_second": 6.251,
1489
+ "step": 2000
1490
+ },
1491
+ {
1492
+ "epoch": 1.260978670012547,
1493
+ "grad_norm": 2.1603596210479736,
1494
+ "learning_rate": 9.619397662556434e-06,
1495
+ "loss": 0.6734,
1496
+ "step": 2010
1497
+ },
1498
+ {
1499
+ "epoch": 1.2672521957340024,
1500
+ "grad_norm": 2.9604389667510986,
1501
+ "learning_rate": 9.615612805539305e-06,
1502
+ "loss": 0.7128,
1503
+ "step": 2020
1504
+ },
1505
+ {
1506
+ "epoch": 1.2735257214554578,
1507
+ "grad_norm": 1.9457265138626099,
1508
+ "learning_rate": 9.6118099746004e-06,
1509
+ "loss": 0.732,
1510
+ "step": 2030
1511
+ },
1512
+ {
1513
+ "epoch": 1.2797992471769133,
1514
+ "grad_norm": 2.6136579513549805,
1515
+ "learning_rate": 9.607989184548544e-06,
1516
+ "loss": 0.7994,
1517
+ "step": 2040
1518
+ },
1519
+ {
1520
+ "epoch": 1.286072772898369,
1521
+ "grad_norm": 2.0028586387634277,
1522
+ "learning_rate": 9.604150450262488e-06,
1523
+ "loss": 0.7655,
1524
+ "step": 2050
1525
+ },
1526
+ {
1527
+ "epoch": 1.2923462986198244,
1528
+ "grad_norm": 2.499894857406616,
1529
+ "learning_rate": 9.600293786690873e-06,
1530
+ "loss": 0.7105,
1531
+ "step": 2060
1532
+ },
1533
+ {
1534
+ "epoch": 1.2986198243412799,
1535
+ "grad_norm": 2.2922027111053467,
1536
+ "learning_rate": 9.596419208852152e-06,
1537
+ "loss": 0.785,
1538
+ "step": 2070
1539
+ },
1540
+ {
1541
+ "epoch": 1.3048933500627353,
1542
+ "grad_norm": 2.0866246223449707,
1543
+ "learning_rate": 9.592526731834536e-06,
1544
+ "loss": 0.7303,
1545
+ "step": 2080
1546
+ },
1547
+ {
1548
+ "epoch": 1.3111668757841908,
1549
+ "grad_norm": 2.182152032852173,
1550
+ "learning_rate": 9.588616370795947e-06,
1551
+ "loss": 0.7502,
1552
+ "step": 2090
1553
+ },
1554
+ {
1555
+ "epoch": 1.3174404015056462,
1556
+ "grad_norm": 2.4968299865722656,
1557
+ "learning_rate": 9.584688140963945e-06,
1558
+ "loss": 0.7811,
1559
+ "step": 2100
1560
+ },
1561
+ {
1562
+ "epoch": 1.3237139272271017,
1563
+ "grad_norm": 2.134629011154175,
1564
+ "learning_rate": 9.580742057635672e-06,
1565
+ "loss": 0.752,
1566
+ "step": 2110
1567
+ },
1568
+ {
1569
+ "epoch": 1.3299874529485571,
1570
+ "grad_norm": 2.61116361618042,
1571
+ "learning_rate": 9.576778136177798e-06,
1572
+ "loss": 0.7478,
1573
+ "step": 2120
1574
+ },
1575
+ {
1576
+ "epoch": 1.3362609786700126,
1577
+ "grad_norm": 2.243969440460205,
1578
+ "learning_rate": 9.572796392026455e-06,
1579
+ "loss": 0.7347,
1580
+ "step": 2130
1581
+ },
1582
+ {
1583
+ "epoch": 1.342534504391468,
1584
+ "grad_norm": 2.3413894176483154,
1585
+ "learning_rate": 9.568796840687184e-06,
1586
+ "loss": 0.7028,
1587
+ "step": 2140
1588
+ },
1589
+ {
1590
+ "epoch": 1.3488080301129235,
1591
+ "grad_norm": 1.9159423112869263,
1592
+ "learning_rate": 9.564779497734864e-06,
1593
+ "loss": 0.7647,
1594
+ "step": 2150
1595
+ },
1596
+ {
1597
+ "epoch": 1.355081555834379,
1598
+ "grad_norm": 2.6007165908813477,
1599
+ "learning_rate": 9.56074437881366e-06,
1600
+ "loss": 0.7631,
1601
+ "step": 2160
1602
+ },
1603
+ {
1604
+ "epoch": 1.3613550815558344,
1605
+ "grad_norm": 2.189784526824951,
1606
+ "learning_rate": 9.55669149963696e-06,
1607
+ "loss": 0.763,
1608
+ "step": 2170
1609
+ },
1610
+ {
1611
+ "epoch": 1.3676286072772899,
1612
+ "grad_norm": 2.0012011528015137,
1613
+ "learning_rate": 9.552620875987312e-06,
1614
+ "loss": 0.7417,
1615
+ "step": 2180
1616
+ },
1617
+ {
1618
+ "epoch": 1.3739021329987453,
1619
+ "grad_norm": 2.5534048080444336,
1620
+ "learning_rate": 9.548532523716366e-06,
1621
+ "loss": 0.7542,
1622
+ "step": 2190
1623
+ },
1624
+ {
1625
+ "epoch": 1.3801756587202008,
1626
+ "grad_norm": 3.0403106212615967,
1627
+ "learning_rate": 9.544426458744805e-06,
1628
+ "loss": 0.7199,
1629
+ "step": 2200
1630
+ },
1631
+ {
1632
+ "epoch": 1.3801756587202008,
1633
+ "eval_loss": 0.7733274698257446,
1634
+ "eval_runtime": 45.1455,
1635
+ "eval_samples_per_second": 99.678,
1636
+ "eval_steps_per_second": 6.246,
1637
+ "step": 2200
1638
+ },
1639
+ {
1640
+ "epoch": 1.3864491844416562,
1641
+ "grad_norm": 2.0234525203704834,
1642
+ "learning_rate": 9.540302697062294e-06,
1643
+ "loss": 0.7766,
1644
+ "step": 2210
1645
+ },
1646
+ {
1647
+ "epoch": 1.3927227101631117,
1648
+ "grad_norm": 1.8603047132492065,
1649
+ "learning_rate": 9.536161254727407e-06,
1650
+ "loss": 0.7793,
1651
+ "step": 2220
1652
+ },
1653
+ {
1654
+ "epoch": 1.3989962358845671,
1655
+ "grad_norm": 1.990689992904663,
1656
+ "learning_rate": 9.532002147867575e-06,
1657
+ "loss": 0.7674,
1658
+ "step": 2230
1659
+ },
1660
+ {
1661
+ "epoch": 1.4052697616060226,
1662
+ "grad_norm": 2.2822988033294678,
1663
+ "learning_rate": 9.527825392679012e-06,
1664
+ "loss": 0.7776,
1665
+ "step": 2240
1666
+ },
1667
+ {
1668
+ "epoch": 1.411543287327478,
1669
+ "grad_norm": 2.289533853530884,
1670
+ "learning_rate": 9.523631005426658e-06,
1671
+ "loss": 0.7368,
1672
+ "step": 2250
1673
+ },
1674
+ {
1675
+ "epoch": 1.4178168130489335,
1676
+ "grad_norm": 2.385829210281372,
1677
+ "learning_rate": 9.51941900244412e-06,
1678
+ "loss": 0.7459,
1679
+ "step": 2260
1680
+ },
1681
+ {
1682
+ "epoch": 1.424090338770389,
1683
+ "grad_norm": 2.0685439109802246,
1684
+ "learning_rate": 9.515189400133594e-06,
1685
+ "loss": 0.7139,
1686
+ "step": 2270
1687
+ },
1688
+ {
1689
+ "epoch": 1.4303638644918444,
1690
+ "grad_norm": 1.9456595182418823,
1691
+ "learning_rate": 9.510942214965819e-06,
1692
+ "loss": 0.7625,
1693
+ "step": 2280
1694
+ },
1695
+ {
1696
+ "epoch": 1.4366373902132998,
1697
+ "grad_norm": 3.2623114585876465,
1698
+ "learning_rate": 9.506677463480003e-06,
1699
+ "loss": 0.7779,
1700
+ "step": 2290
1701
+ },
1702
+ {
1703
+ "epoch": 1.4429109159347553,
1704
+ "grad_norm": 1.9773170948028564,
1705
+ "learning_rate": 9.50239516228376e-06,
1706
+ "loss": 0.7231,
1707
+ "step": 2300
1708
+ },
1709
+ {
1710
+ "epoch": 1.4491844416562107,
1711
+ "grad_norm": 2.506037950515747,
1712
+ "learning_rate": 9.49809532805304e-06,
1713
+ "loss": 0.7585,
1714
+ "step": 2310
1715
+ },
1716
+ {
1717
+ "epoch": 1.4554579673776662,
1718
+ "grad_norm": 2.211965322494507,
1719
+ "learning_rate": 9.493777977532072e-06,
1720
+ "loss": 0.7364,
1721
+ "step": 2320
1722
+ },
1723
+ {
1724
+ "epoch": 1.4617314930991216,
1725
+ "grad_norm": 1.7759398221969604,
1726
+ "learning_rate": 9.489443127533304e-06,
1727
+ "loss": 0.7575,
1728
+ "step": 2330
1729
+ },
1730
+ {
1731
+ "epoch": 1.468005018820577,
1732
+ "grad_norm": 2.7802937030792236,
1733
+ "learning_rate": 9.485090794937319e-06,
1734
+ "loss": 0.7191,
1735
+ "step": 2340
1736
+ },
1737
+ {
1738
+ "epoch": 1.4742785445420326,
1739
+ "grad_norm": 2.185365915298462,
1740
+ "learning_rate": 9.480720996692783e-06,
1741
+ "loss": 0.701,
1742
+ "step": 2350
1743
+ },
1744
+ {
1745
+ "epoch": 1.480552070263488,
1746
+ "grad_norm": 1.9464114904403687,
1747
+ "learning_rate": 9.476333749816382e-06,
1748
+ "loss": 0.6689,
1749
+ "step": 2360
1750
+ },
1751
+ {
1752
+ "epoch": 1.4868255959849435,
1753
+ "grad_norm": 2.6900150775909424,
1754
+ "learning_rate": 9.47192907139274e-06,
1755
+ "loss": 0.7416,
1756
+ "step": 2370
1757
+ },
1758
+ {
1759
+ "epoch": 1.4930991217063991,
1760
+ "grad_norm": 2.480715751647949,
1761
+ "learning_rate": 9.46750697857437e-06,
1762
+ "loss": 0.7309,
1763
+ "step": 2380
1764
+ },
1765
+ {
1766
+ "epoch": 1.4993726474278546,
1767
+ "grad_norm": 2.2300286293029785,
1768
+ "learning_rate": 9.463067488581598e-06,
1769
+ "loss": 0.7365,
1770
+ "step": 2390
1771
+ },
1772
+ {
1773
+ "epoch": 1.50564617314931,
1774
+ "grad_norm": 2.553088665008545,
1775
+ "learning_rate": 9.45861061870249e-06,
1776
+ "loss": 0.711,
1777
+ "step": 2400
1778
+ },
1779
+ {
1780
+ "epoch": 1.50564617314931,
1781
+ "eval_loss": 0.7718048095703125,
1782
+ "eval_runtime": 44.9202,
1783
+ "eval_samples_per_second": 100.178,
1784
+ "eval_steps_per_second": 6.278,
1785
+ "step": 2400
1786
+ },
1787
+ {
1788
+ "epoch": 1.5119196988707655,
1789
+ "grad_norm": 2.293585777282715,
1790
+ "learning_rate": 9.454136386292804e-06,
1791
+ "loss": 0.7494,
1792
+ "step": 2410
1793
+ },
1794
+ {
1795
+ "epoch": 1.518193224592221,
1796
+ "grad_norm": 2.3375558853149414,
1797
+ "learning_rate": 9.449644808775902e-06,
1798
+ "loss": 0.6885,
1799
+ "step": 2420
1800
+ },
1801
+ {
1802
+ "epoch": 1.5244667503136764,
1803
+ "grad_norm": 2.7316160202026367,
1804
+ "learning_rate": 9.445135903642693e-06,
1805
+ "loss": 0.766,
1806
+ "step": 2430
1807
+ },
1808
+ {
1809
+ "epoch": 1.5307402760351319,
1810
+ "grad_norm": 2.812262773513794,
1811
+ "learning_rate": 9.440609688451561e-06,
1812
+ "loss": 0.7303,
1813
+ "step": 2440
1814
+ },
1815
+ {
1816
+ "epoch": 1.5370138017565873,
1817
+ "grad_norm": 2.25638747215271,
1818
+ "learning_rate": 9.4360661808283e-06,
1819
+ "loss": 0.7169,
1820
+ "step": 2450
1821
+ },
1822
+ {
1823
+ "epoch": 1.5432873274780428,
1824
+ "grad_norm": 2.212007522583008,
1825
+ "learning_rate": 9.431505398466045e-06,
1826
+ "loss": 0.6865,
1827
+ "step": 2460
1828
+ },
1829
+ {
1830
+ "epoch": 1.5495608531994982,
1831
+ "grad_norm": 2.3759188652038574,
1832
+ "learning_rate": 9.426927359125195e-06,
1833
+ "loss": 0.6798,
1834
+ "step": 2470
1835
+ },
1836
+ {
1837
+ "epoch": 1.5558343789209537,
1838
+ "grad_norm": 2.6514952182769775,
1839
+ "learning_rate": 9.422332080633361e-06,
1840
+ "loss": 0.7584,
1841
+ "step": 2480
1842
+ },
1843
+ {
1844
+ "epoch": 1.5621079046424091,
1845
+ "grad_norm": 2.413926124572754,
1846
+ "learning_rate": 9.417719580885275e-06,
1847
+ "loss": 0.7365,
1848
+ "step": 2490
1849
+ },
1850
+ {
1851
+ "epoch": 1.5683814303638646,
1852
+ "grad_norm": 2.4188032150268555,
1853
+ "learning_rate": 9.413089877842735e-06,
1854
+ "loss": 0.7524,
1855
+ "step": 2500
1856
+ },
1857
+ {
1858
+ "epoch": 1.57465495608532,
1859
+ "grad_norm": 2.581517457962036,
1860
+ "learning_rate": 9.408442989534536e-06,
1861
+ "loss": 0.8129,
1862
+ "step": 2510
1863
+ },
1864
+ {
1865
+ "epoch": 1.5809284818067755,
1866
+ "grad_norm": 3.174863338470459,
1867
+ "learning_rate": 9.403778934056392e-06,
1868
+ "loss": 0.6973,
1869
+ "step": 2520
1870
+ },
1871
+ {
1872
+ "epoch": 1.587202007528231,
1873
+ "grad_norm": 2.4910361766815186,
1874
+ "learning_rate": 9.399097729570865e-06,
1875
+ "loss": 0.7304,
1876
+ "step": 2530
1877
+ },
1878
+ {
1879
+ "epoch": 1.5934755332496864,
1880
+ "grad_norm": 2.1573550701141357,
1881
+ "learning_rate": 9.394399394307303e-06,
1882
+ "loss": 0.756,
1883
+ "step": 2540
1884
+ },
1885
+ {
1886
+ "epoch": 1.5997490589711418,
1887
+ "grad_norm": 2.5557515621185303,
1888
+ "learning_rate": 9.38968394656176e-06,
1889
+ "loss": 0.76,
1890
+ "step": 2550
1891
+ },
1892
+ {
1893
+ "epoch": 1.6060225846925973,
1894
+ "grad_norm": 2.0382895469665527,
1895
+ "learning_rate": 9.384951404696933e-06,
1896
+ "loss": 0.6973,
1897
+ "step": 2560
1898
+ },
1899
+ {
1900
+ "epoch": 1.6122961104140527,
1901
+ "grad_norm": 2.976776123046875,
1902
+ "learning_rate": 9.380201787142085e-06,
1903
+ "loss": 0.7363,
1904
+ "step": 2570
1905
+ },
1906
+ {
1907
+ "epoch": 1.6185696361355082,
1908
+ "grad_norm": 2.2506632804870605,
1909
+ "learning_rate": 9.37543511239297e-06,
1910
+ "loss": 0.7052,
1911
+ "step": 2580
1912
+ },
1913
+ {
1914
+ "epoch": 1.6248431618569636,
1915
+ "grad_norm": 2.309023380279541,
1916
+ "learning_rate": 9.370651399011769e-06,
1917
+ "loss": 0.7451,
1918
+ "step": 2590
1919
+ },
1920
+ {
1921
+ "epoch": 1.631116687578419,
1922
+ "grad_norm": 2.504362106323242,
1923
+ "learning_rate": 9.365850665627016e-06,
1924
+ "loss": 0.7531,
1925
+ "step": 2600
1926
+ },
1927
+ {
1928
+ "epoch": 1.631116687578419,
1929
+ "eval_loss": 0.7694670557975769,
1930
+ "eval_runtime": 45.1088,
1931
+ "eval_samples_per_second": 99.759,
1932
+ "eval_steps_per_second": 6.252,
1933
+ "step": 2600
1934
+ },
1935
+ {
1936
+ "epoch": 1.6373902132998746,
1937
+ "grad_norm": 2.637925624847412,
1938
+ "learning_rate": 9.36103293093352e-06,
1939
+ "loss": 0.7894,
1940
+ "step": 2610
1941
+ },
1942
+ {
1943
+ "epoch": 1.64366373902133,
1944
+ "grad_norm": 2.235577344894409,
1945
+ "learning_rate": 9.356198213692297e-06,
1946
+ "loss": 0.7468,
1947
+ "step": 2620
1948
+ },
1949
+ {
1950
+ "epoch": 1.6499372647427855,
1951
+ "grad_norm": 2.54366135597229,
1952
+ "learning_rate": 9.351346532730499e-06,
1953
+ "loss": 0.7262,
1954
+ "step": 2630
1955
+ },
1956
+ {
1957
+ "epoch": 1.656210790464241,
1958
+ "grad_norm": 2.836397171020508,
1959
+ "learning_rate": 9.346477906941331e-06,
1960
+ "loss": 0.7084,
1961
+ "step": 2640
1962
+ },
1963
+ {
1964
+ "epoch": 1.6624843161856964,
1965
+ "grad_norm": 3.1089181900024414,
1966
+ "learning_rate": 9.341592355283986e-06,
1967
+ "loss": 0.7268,
1968
+ "step": 2650
1969
+ },
1970
+ {
1971
+ "epoch": 1.6687578419071518,
1972
+ "grad_norm": 2.175337076187134,
1973
+ "learning_rate": 9.336689896783575e-06,
1974
+ "loss": 0.7439,
1975
+ "step": 2660
1976
+ },
1977
+ {
1978
+ "epoch": 1.6750313676286073,
1979
+ "grad_norm": 2.851436138153076,
1980
+ "learning_rate": 9.331770550531037e-06,
1981
+ "loss": 0.6658,
1982
+ "step": 2670
1983
+ },
1984
+ {
1985
+ "epoch": 1.6813048933500627,
1986
+ "grad_norm": 2.7541348934173584,
1987
+ "learning_rate": 9.32683433568308e-06,
1988
+ "loss": 0.7483,
1989
+ "step": 2680
1990
+ },
1991
+ {
1992
+ "epoch": 1.6875784190715182,
1993
+ "grad_norm": 2.5150299072265625,
1994
+ "learning_rate": 9.321881271462104e-06,
1995
+ "loss": 0.7562,
1996
+ "step": 2690
1997
+ },
1998
+ {
1999
+ "epoch": 1.6938519447929736,
2000
+ "grad_norm": 2.654846429824829,
2001
+ "learning_rate": 9.316911377156116e-06,
2002
+ "loss": 0.7537,
2003
+ "step": 2700
2004
+ },
2005
+ {
2006
+ "epoch": 1.700125470514429,
2007
+ "grad_norm": 2.382484197616577,
2008
+ "learning_rate": 9.31192467211867e-06,
2009
+ "loss": 0.733,
2010
+ "step": 2710
2011
+ },
2012
+ {
2013
+ "epoch": 1.7063989962358845,
2014
+ "grad_norm": 2.9546570777893066,
2015
+ "learning_rate": 9.306921175768776e-06,
2016
+ "loss": 0.7436,
2017
+ "step": 2720
2018
+ },
2019
+ {
2020
+ "epoch": 1.71267252195734,
2021
+ "grad_norm": 2.273836612701416,
2022
+ "learning_rate": 9.301900907590836e-06,
2023
+ "loss": 0.7245,
2024
+ "step": 2730
2025
+ },
2026
+ {
2027
+ "epoch": 1.7189460476787954,
2028
+ "grad_norm": 3.2552435398101807,
2029
+ "learning_rate": 9.296863887134561e-06,
2030
+ "loss": 0.7193,
2031
+ "step": 2740
2032
+ },
2033
+ {
2034
+ "epoch": 1.725219573400251,
2035
+ "grad_norm": 2.515960931777954,
2036
+ "learning_rate": 9.291810134014904e-06,
2037
+ "loss": 0.7343,
2038
+ "step": 2750
2039
+ },
2040
+ {
2041
+ "epoch": 1.7314930991217063,
2042
+ "grad_norm": 2.6132092475891113,
2043
+ "learning_rate": 9.286739667911973e-06,
2044
+ "loss": 0.727,
2045
+ "step": 2760
2046
+ },
2047
+ {
2048
+ "epoch": 1.7377666248431618,
2049
+ "grad_norm": 3.021728754043579,
2050
+ "learning_rate": 9.281652508570957e-06,
2051
+ "loss": 0.7158,
2052
+ "step": 2770
2053
+ },
2054
+ {
2055
+ "epoch": 1.7440401505646173,
2056
+ "grad_norm": 2.9270167350769043,
2057
+ "learning_rate": 9.27654867580206e-06,
2058
+ "loss": 0.7413,
2059
+ "step": 2780
2060
+ },
2061
+ {
2062
+ "epoch": 1.7503136762860727,
2063
+ "grad_norm": 2.496297597885132,
2064
+ "learning_rate": 9.271428189480405e-06,
2065
+ "loss": 0.7519,
2066
+ "step": 2790
2067
+ },
2068
+ {
2069
+ "epoch": 1.7565872020075282,
2070
+ "grad_norm": 2.549004077911377,
2071
+ "learning_rate": 9.266291069545972e-06,
2072
+ "loss": 0.7423,
2073
+ "step": 2800
2074
+ },
2075
+ {
2076
+ "epoch": 1.7565872020075282,
2077
+ "eval_loss": 0.7679787278175354,
2078
+ "eval_runtime": 45.1228,
2079
+ "eval_samples_per_second": 99.728,
2080
+ "eval_steps_per_second": 6.25,
2081
+ "step": 2800
2082
+ },
2083
+ {
2084
+ "epoch": 1.7628607277289836,
2085
+ "grad_norm": 2.4474334716796875,
2086
+ "learning_rate": 9.261137336003511e-06,
2087
+ "loss": 0.6846,
2088
+ "step": 2810
2089
+ },
2090
+ {
2091
+ "epoch": 1.769134253450439,
2092
+ "grad_norm": 2.7972934246063232,
2093
+ "learning_rate": 9.255967008922475e-06,
2094
+ "loss": 0.7126,
2095
+ "step": 2820
2096
+ },
2097
+ {
2098
+ "epoch": 1.7754077791718945,
2099
+ "grad_norm": 2.0689656734466553,
2100
+ "learning_rate": 9.250780108436926e-06,
2101
+ "loss": 0.7814,
2102
+ "step": 2830
2103
+ },
2104
+ {
2105
+ "epoch": 1.78168130489335,
2106
+ "grad_norm": 2.0784525871276855,
2107
+ "learning_rate": 9.245576654745471e-06,
2108
+ "loss": 0.7576,
2109
+ "step": 2840
2110
+ },
2111
+ {
2112
+ "epoch": 1.7879548306148054,
2113
+ "grad_norm": 2.5975897312164307,
2114
+ "learning_rate": 9.24035666811118e-06,
2115
+ "loss": 0.6671,
2116
+ "step": 2850
2117
+ },
2118
+ {
2119
+ "epoch": 1.7942283563362609,
2120
+ "grad_norm": 2.8207385540008545,
2121
+ "learning_rate": 9.235120168861495e-06,
2122
+ "loss": 0.7347,
2123
+ "step": 2860
2124
+ },
2125
+ {
2126
+ "epoch": 1.8005018820577163,
2127
+ "grad_norm": 2.60093355178833,
2128
+ "learning_rate": 9.229867177388172e-06,
2129
+ "loss": 0.7472,
2130
+ "step": 2870
2131
+ },
2132
+ {
2133
+ "epoch": 1.8067754077791718,
2134
+ "grad_norm": 2.5314881801605225,
2135
+ "learning_rate": 9.224597714147186e-06,
2136
+ "loss": 0.6843,
2137
+ "step": 2880
2138
+ },
2139
+ {
2140
+ "epoch": 1.8130489335006272,
2141
+ "grad_norm": 2.968095064163208,
2142
+ "learning_rate": 9.219311799658652e-06,
2143
+ "loss": 0.7753,
2144
+ "step": 2890
2145
+ },
2146
+ {
2147
+ "epoch": 1.8193224592220827,
2148
+ "grad_norm": 2.7731547355651855,
2149
+ "learning_rate": 9.214009454506754e-06,
2150
+ "loss": 0.7061,
2151
+ "step": 2900
2152
+ },
2153
+ {
2154
+ "epoch": 1.8255959849435381,
2155
+ "grad_norm": 2.7813992500305176,
2156
+ "learning_rate": 9.208690699339656e-06,
2157
+ "loss": 0.7658,
2158
+ "step": 2910
2159
+ },
2160
+ {
2161
+ "epoch": 1.8318695106649936,
2162
+ "grad_norm": 2.1669435501098633,
2163
+ "learning_rate": 9.203355554869428e-06,
2164
+ "loss": 0.7928,
2165
+ "step": 2920
2166
+ },
2167
+ {
2168
+ "epoch": 1.838143036386449,
2169
+ "grad_norm": 2.3559539318084717,
2170
+ "learning_rate": 9.198004041871962e-06,
2171
+ "loss": 0.7423,
2172
+ "step": 2930
2173
+ },
2174
+ {
2175
+ "epoch": 1.8444165621079045,
2176
+ "grad_norm": 2.534313201904297,
2177
+ "learning_rate": 9.192636181186887e-06,
2178
+ "loss": 0.7108,
2179
+ "step": 2940
2180
+ },
2181
+ {
2182
+ "epoch": 1.85069008782936,
2183
+ "grad_norm": 2.501065254211426,
2184
+ "learning_rate": 9.1872519937175e-06,
2185
+ "loss": 0.7472,
2186
+ "step": 2950
2187
+ },
2188
+ {
2189
+ "epoch": 1.8569636135508154,
2190
+ "grad_norm": 2.726268768310547,
2191
+ "learning_rate": 9.181851500430672e-06,
2192
+ "loss": 0.8224,
2193
+ "step": 2960
2194
+ },
2195
+ {
2196
+ "epoch": 1.8632371392722709,
2197
+ "grad_norm": 2.264862537384033,
2198
+ "learning_rate": 9.176434722356772e-06,
2199
+ "loss": 0.7494,
2200
+ "step": 2970
2201
+ },
2202
+ {
2203
+ "epoch": 1.8695106649937263,
2204
+ "grad_norm": 2.6671266555786133,
2205
+ "learning_rate": 9.17100168058959e-06,
2206
+ "loss": 0.7526,
2207
+ "step": 2980
2208
+ },
2209
+ {
2210
+ "epoch": 1.875784190715182,
2211
+ "grad_norm": 3.1820783615112305,
2212
+ "learning_rate": 9.165552396286236e-06,
2213
+ "loss": 0.7374,
2214
+ "step": 2990
2215
+ },
2216
+ {
2217
+ "epoch": 1.8820577164366374,
2218
+ "grad_norm": 2.4421300888061523,
2219
+ "learning_rate": 9.160086890667086e-06,
2220
+ "loss": 0.6911,
2221
+ "step": 3000
2222
+ },
2223
+ {
2224
+ "epoch": 1.8820577164366374,
2225
+ "eval_loss": 0.766462504863739,
2226
+ "eval_runtime": 45.1509,
2227
+ "eval_samples_per_second": 99.666,
2228
+ "eval_steps_per_second": 6.246,
2229
+ "step": 3000
2230
+ },
2231
+ {
2232
+ "epoch": 1.888331242158093,
2233
+ "grad_norm": 3.058067798614502,
2234
+ "learning_rate": 9.154605185015678e-06,
2235
+ "loss": 0.6797,
2236
+ "step": 3010
2237
+ },
2238
+ {
2239
+ "epoch": 1.8946047678795483,
2240
+ "grad_norm": 2.4988503456115723,
2241
+ "learning_rate": 9.14910730067863e-06,
2242
+ "loss": 0.6985,
2243
+ "step": 3020
2244
+ },
2245
+ {
2246
+ "epoch": 1.9008782936010038,
2247
+ "grad_norm": 3.2158496379852295,
2248
+ "learning_rate": 9.143593259065573e-06,
2249
+ "loss": 0.8027,
2250
+ "step": 3030
2251
+ },
2252
+ {
2253
+ "epoch": 1.9071518193224593,
2254
+ "grad_norm": 2.1855030059814453,
2255
+ "learning_rate": 9.138063081649052e-06,
2256
+ "loss": 0.7483,
2257
+ "step": 3040
2258
+ },
2259
+ {
2260
+ "epoch": 1.9134253450439147,
2261
+ "grad_norm": 2.3332061767578125,
2262
+ "learning_rate": 9.132516789964443e-06,
2263
+ "loss": 0.7019,
2264
+ "step": 3050
2265
+ },
2266
+ {
2267
+ "epoch": 1.9196988707653702,
2268
+ "grad_norm": 2.6408863067626953,
2269
+ "learning_rate": 9.126954405609882e-06,
2270
+ "loss": 0.6706,
2271
+ "step": 3060
2272
+ },
2273
+ {
2274
+ "epoch": 1.9259723964868256,
2275
+ "grad_norm": 3.055828809738159,
2276
+ "learning_rate": 9.121375950246165e-06,
2277
+ "loss": 0.7579,
2278
+ "step": 3070
2279
+ },
2280
+ {
2281
+ "epoch": 1.932245922208281,
2282
+ "grad_norm": 2.7606723308563232,
2283
+ "learning_rate": 9.115781445596676e-06,
2284
+ "loss": 0.7646,
2285
+ "step": 3080
2286
+ },
2287
+ {
2288
+ "epoch": 1.9385194479297365,
2289
+ "grad_norm": 2.9983773231506348,
2290
+ "learning_rate": 9.110170913447294e-06,
2291
+ "loss": 0.7276,
2292
+ "step": 3090
2293
+ },
2294
+ {
2295
+ "epoch": 1.944792973651192,
2296
+ "grad_norm": 2.4849441051483154,
2297
+ "learning_rate": 9.104544375646314e-06,
2298
+ "loss": 0.7249,
2299
+ "step": 3100
2300
+ },
2301
+ {
2302
+ "epoch": 1.9510664993726474,
2303
+ "grad_norm": 2.335519552230835,
2304
+ "learning_rate": 9.098901854104359e-06,
2305
+ "loss": 0.7584,
2306
+ "step": 3110
2307
+ },
2308
+ {
2309
+ "epoch": 1.9573400250941029,
2310
+ "grad_norm": 3.0842840671539307,
2311
+ "learning_rate": 9.09324337079429e-06,
2312
+ "loss": 0.7649,
2313
+ "step": 3120
2314
+ },
2315
+ {
2316
+ "epoch": 1.9636135508155583,
2317
+ "grad_norm": 2.238382577896118,
2318
+ "learning_rate": 9.08756894775114e-06,
2319
+ "loss": 0.6926,
2320
+ "step": 3130
2321
+ },
2322
+ {
2323
+ "epoch": 1.9698870765370138,
2324
+ "grad_norm": 2.245692014694214,
2325
+ "learning_rate": 9.081878607071996e-06,
2326
+ "loss": 0.7529,
2327
+ "step": 3140
2328
+ },
2329
+ {
2330
+ "epoch": 1.9761606022584692,
2331
+ "grad_norm": 2.9073171615600586,
2332
+ "learning_rate": 9.076172370915944e-06,
2333
+ "loss": 0.7684,
2334
+ "step": 3150
2335
+ },
2336
+ {
2337
+ "epoch": 1.9824341279799247,
2338
+ "grad_norm": 2.327453851699829,
2339
+ "learning_rate": 9.07045026150396e-06,
2340
+ "loss": 0.7569,
2341
+ "step": 3160
2342
+ },
2343
+ {
2344
+ "epoch": 1.9887076537013801,
2345
+ "grad_norm": 2.6128671169281006,
2346
+ "learning_rate": 9.064712301118842e-06,
2347
+ "loss": 0.6668,
2348
+ "step": 3170
2349
+ },
2350
+ {
2351
+ "epoch": 1.9949811794228356,
2352
+ "grad_norm": 2.441389560699463,
2353
+ "learning_rate": 9.058958512105104e-06,
2354
+ "loss": 0.7808,
2355
+ "step": 3180
2356
+ },
2357
+ {
2358
+ "epoch": 2.0012547051442913,
2359
+ "grad_norm": 2.2701094150543213,
2360
+ "learning_rate": 9.053188916868912e-06,
2361
+ "loss": 0.7191,
2362
+ "step": 3190
2363
+ },
2364
+ {
2365
+ "epoch": 2.0075282308657467,
2366
+ "grad_norm": 2.204155206680298,
2367
+ "learning_rate": 9.04740353787797e-06,
2368
+ "loss": 0.7206,
2369
+ "step": 3200
2370
+ },
2371
+ {
2372
+ "epoch": 2.0075282308657467,
2373
+ "eval_loss": 0.764485776424408,
2374
+ "eval_runtime": 44.8067,
2375
+ "eval_samples_per_second": 100.431,
2376
+ "eval_steps_per_second": 6.294,
2377
+ "step": 3200
2378
+ },
2379
+ {
2380
+ "epoch": 2.013801756587202,
2381
+ "grad_norm": 2.79905366897583,
2382
+ "learning_rate": 9.041602397661459e-06,
2383
+ "loss": 0.6708,
2384
+ "step": 3210
2385
+ },
2386
+ {
2387
+ "epoch": 2.0200752823086576,
2388
+ "grad_norm": 2.987636089324951,
2389
+ "learning_rate": 9.035785518809928e-06,
2390
+ "loss": 0.7049,
2391
+ "step": 3220
2392
+ },
2393
+ {
2394
+ "epoch": 2.026348808030113,
2395
+ "grad_norm": 2.120682716369629,
2396
+ "learning_rate": 9.029952923975217e-06,
2397
+ "loss": 0.65,
2398
+ "step": 3230
2399
+ },
2400
+ {
2401
+ "epoch": 2.0326223337515685,
2402
+ "grad_norm": 3.0108895301818848,
2403
+ "learning_rate": 9.024104635870368e-06,
2404
+ "loss": 0.6459,
2405
+ "step": 3240
2406
+ },
2407
+ {
2408
+ "epoch": 2.038895859473024,
2409
+ "grad_norm": 3.0043015480041504,
2410
+ "learning_rate": 9.018240677269532e-06,
2411
+ "loss": 0.7335,
2412
+ "step": 3250
2413
+ },
2414
+ {
2415
+ "epoch": 2.0451693851944794,
2416
+ "grad_norm": 3.1042096614837646,
2417
+ "learning_rate": 9.012361071007892e-06,
2418
+ "loss": 0.7028,
2419
+ "step": 3260
2420
+ },
2421
+ {
2422
+ "epoch": 2.051442910915935,
2423
+ "grad_norm": 2.9110677242279053,
2424
+ "learning_rate": 9.00646583998155e-06,
2425
+ "loss": 0.7218,
2426
+ "step": 3270
2427
+ },
2428
+ {
2429
+ "epoch": 2.0577164366373903,
2430
+ "grad_norm": 3.1871602535247803,
2431
+ "learning_rate": 9.000555007147469e-06,
2432
+ "loss": 0.688,
2433
+ "step": 3280
2434
+ },
2435
+ {
2436
+ "epoch": 2.063989962358846,
2437
+ "grad_norm": 3.1857354640960693,
2438
+ "learning_rate": 8.994628595523358e-06,
2439
+ "loss": 0.6847,
2440
+ "step": 3290
2441
+ },
2442
+ {
2443
+ "epoch": 2.0702634880803013,
2444
+ "grad_norm": 3.1492631435394287,
2445
+ "learning_rate": 8.988686628187597e-06,
2446
+ "loss": 0.71,
2447
+ "step": 3300
2448
+ },
2449
+ {
2450
+ "epoch": 2.0765370138017567,
2451
+ "grad_norm": 3.91326642036438,
2452
+ "learning_rate": 8.98272912827914e-06,
2453
+ "loss": 0.6827,
2454
+ "step": 3310
2455
+ },
2456
+ {
2457
+ "epoch": 2.082810539523212,
2458
+ "grad_norm": 3.0140974521636963,
2459
+ "learning_rate": 8.97675611899743e-06,
2460
+ "loss": 0.6514,
2461
+ "step": 3320
2462
+ },
2463
+ {
2464
+ "epoch": 2.0890840652446676,
2465
+ "grad_norm": 2.8540091514587402,
2466
+ "learning_rate": 8.970767623602299e-06,
2467
+ "loss": 0.6723,
2468
+ "step": 3330
2469
+ },
2470
+ {
2471
+ "epoch": 2.095357590966123,
2472
+ "grad_norm": 2.849728584289551,
2473
+ "learning_rate": 8.964763665413894e-06,
2474
+ "loss": 0.6874,
2475
+ "step": 3340
2476
+ },
2477
+ {
2478
+ "epoch": 2.1016311166875785,
2479
+ "grad_norm": 2.8010337352752686,
2480
+ "learning_rate": 8.95874426781257e-06,
2481
+ "loss": 0.6788,
2482
+ "step": 3350
2483
+ },
2484
+ {
2485
+ "epoch": 2.107904642409034,
2486
+ "grad_norm": 3.4718518257141113,
2487
+ "learning_rate": 8.952709454238809e-06,
2488
+ "loss": 0.7133,
2489
+ "step": 3360
2490
+ },
2491
+ {
2492
+ "epoch": 2.1141781681304894,
2493
+ "grad_norm": 2.874509811401367,
2494
+ "learning_rate": 8.946659248193122e-06,
2495
+ "loss": 0.7019,
2496
+ "step": 3370
2497
+ },
2498
+ {
2499
+ "epoch": 2.120451693851945,
2500
+ "grad_norm": 3.0436558723449707,
2501
+ "learning_rate": 8.940593673235962e-06,
2502
+ "loss": 0.675,
2503
+ "step": 3380
2504
+ },
2505
+ {
2506
+ "epoch": 2.1267252195734003,
2507
+ "grad_norm": 3.658440351486206,
2508
+ "learning_rate": 8.934512752987635e-06,
2509
+ "loss": 0.6886,
2510
+ "step": 3390
2511
+ },
2512
+ {
2513
+ "epoch": 2.132998745294856,
2514
+ "grad_norm": 3.0143980979919434,
2515
+ "learning_rate": 8.928416511128194e-06,
2516
+ "loss": 0.7139,
2517
+ "step": 3400
2518
+ },
2519
+ {
2520
+ "epoch": 2.132998745294856,
2521
+ "eval_loss": 0.7689785957336426,
2522
+ "eval_runtime": 44.9486,
2523
+ "eval_samples_per_second": 100.114,
2524
+ "eval_steps_per_second": 6.274,
2525
+ "step": 3400
2526
+ },
2527
+ {
2528
+ "epoch": 2.1392722710163112,
2529
+ "grad_norm": 3.6301114559173584,
2530
+ "learning_rate": 8.922304971397369e-06,
2531
+ "loss": 0.6935,
2532
+ "step": 3410
2533
+ },
2534
+ {
2535
+ "epoch": 2.1455457967377667,
2536
+ "grad_norm": 2.127795457839966,
2537
+ "learning_rate": 8.916178157594453e-06,
2538
+ "loss": 0.7591,
2539
+ "step": 3420
2540
+ },
2541
+ {
2542
+ "epoch": 2.151819322459222,
2543
+ "grad_norm": 2.917729377746582,
2544
+ "learning_rate": 8.910036093578223e-06,
2545
+ "loss": 0.7116,
2546
+ "step": 3430
2547
+ },
2548
+ {
2549
+ "epoch": 2.1580928481806776,
2550
+ "grad_norm": 3.085564374923706,
2551
+ "learning_rate": 8.90387880326684e-06,
2552
+ "loss": 0.706,
2553
+ "step": 3440
2554
+ },
2555
+ {
2556
+ "epoch": 2.164366373902133,
2557
+ "grad_norm": 2.7875218391418457,
2558
+ "learning_rate": 8.897706310637766e-06,
2559
+ "loss": 0.6973,
2560
+ "step": 3450
2561
+ },
2562
+ {
2563
+ "epoch": 2.1706398996235885,
2564
+ "grad_norm": 3.6002438068389893,
2565
+ "learning_rate": 8.89151863972765e-06,
2566
+ "loss": 0.685,
2567
+ "step": 3460
2568
+ },
2569
+ {
2570
+ "epoch": 2.176913425345044,
2571
+ "grad_norm": 4.168911933898926,
2572
+ "learning_rate": 8.88531581463226e-06,
2573
+ "loss": 0.6577,
2574
+ "step": 3470
2575
+ },
2576
+ {
2577
+ "epoch": 2.1831869510664994,
2578
+ "grad_norm": 3.066494941711426,
2579
+ "learning_rate": 8.879097859506371e-06,
2580
+ "loss": 0.6419,
2581
+ "step": 3480
2582
+ },
2583
+ {
2584
+ "epoch": 2.189460476787955,
2585
+ "grad_norm": 2.2242279052734375,
2586
+ "learning_rate": 8.872864798563676e-06,
2587
+ "loss": 0.6823,
2588
+ "step": 3490
2589
+ },
2590
+ {
2591
+ "epoch": 2.1957340025094103,
2592
+ "grad_norm": 3.157374620437622,
2593
+ "learning_rate": 8.866616656076696e-06,
2594
+ "loss": 0.667,
2595
+ "step": 3500
2596
+ },
2597
+ {
2598
+ "epoch": 2.2020075282308658,
2599
+ "grad_norm": 3.3725035190582275,
2600
+ "learning_rate": 8.860353456376679e-06,
2601
+ "loss": 0.689,
2602
+ "step": 3510
2603
+ },
2604
+ {
2605
+ "epoch": 2.208281053952321,
2606
+ "grad_norm": 2.948709487915039,
2607
+ "learning_rate": 8.854075223853509e-06,
2608
+ "loss": 0.6874,
2609
+ "step": 3520
2610
+ },
2611
+ {
2612
+ "epoch": 2.2145545796737767,
2613
+ "grad_norm": 2.990182399749756,
2614
+ "learning_rate": 8.847781982955613e-06,
2615
+ "loss": 0.6774,
2616
+ "step": 3530
2617
+ },
2618
+ {
2619
+ "epoch": 2.220828105395232,
2620
+ "grad_norm": 2.992229700088501,
2621
+ "learning_rate": 8.841473758189853e-06,
2622
+ "loss": 0.68,
2623
+ "step": 3540
2624
+ },
2625
+ {
2626
+ "epoch": 2.2271016311166876,
2627
+ "grad_norm": 2.7501344680786133,
2628
+ "learning_rate": 8.835150574121455e-06,
2629
+ "loss": 0.7222,
2630
+ "step": 3550
2631
+ },
2632
+ {
2633
+ "epoch": 2.233375156838143,
2634
+ "grad_norm": 2.766124725341797,
2635
+ "learning_rate": 8.828812455373891e-06,
2636
+ "loss": 0.6826,
2637
+ "step": 3560
2638
+ },
2639
+ {
2640
+ "epoch": 2.2396486825595985,
2641
+ "grad_norm": 2.608975648880005,
2642
+ "learning_rate": 8.82245942662879e-06,
2643
+ "loss": 0.6197,
2644
+ "step": 3570
2645
+ },
2646
+ {
2647
+ "epoch": 2.245922208281054,
2648
+ "grad_norm": 3.362671375274658,
2649
+ "learning_rate": 8.816091512625845e-06,
2650
+ "loss": 0.7241,
2651
+ "step": 3580
2652
+ },
2653
+ {
2654
+ "epoch": 2.2521957340025094,
2655
+ "grad_norm": 2.848883867263794,
2656
+ "learning_rate": 8.80970873816271e-06,
2657
+ "loss": 0.6613,
2658
+ "step": 3590
2659
+ },
2660
+ {
2661
+ "epoch": 2.258469259723965,
2662
+ "grad_norm": 3.4041519165039062,
2663
+ "learning_rate": 8.803311128094918e-06,
2664
+ "loss": 0.6934,
2665
+ "step": 3600
2666
+ },
2667
+ {
2668
+ "epoch": 2.258469259723965,
2669
+ "eval_loss": 0.7689230442047119,
2670
+ "eval_runtime": 44.9587,
2671
+ "eval_samples_per_second": 100.092,
2672
+ "eval_steps_per_second": 6.272,
2673
+ "step": 3600
2674
+ },
2675
+ {
2676
+ "epoch": 2.2647427854454203,
2677
+ "grad_norm": 3.4062633514404297,
2678
+ "learning_rate": 8.796898707335766e-06,
2679
+ "loss": 0.6948,
2680
+ "step": 3610
2681
+ },
2682
+ {
2683
+ "epoch": 2.2710163111668757,
2684
+ "grad_norm": 3.2286086082458496,
2685
+ "learning_rate": 8.790471500856229e-06,
2686
+ "loss": 0.7056,
2687
+ "step": 3620
2688
+ },
2689
+ {
2690
+ "epoch": 2.277289836888331,
2691
+ "grad_norm": 2.9476211071014404,
2692
+ "learning_rate": 8.784029533684857e-06,
2693
+ "loss": 0.7042,
2694
+ "step": 3630
2695
+ },
2696
+ {
2697
+ "epoch": 2.2835633626097867,
2698
+ "grad_norm": 3.454038381576538,
2699
+ "learning_rate": 8.777572830907685e-06,
2700
+ "loss": 0.6815,
2701
+ "step": 3640
2702
+ },
2703
+ {
2704
+ "epoch": 2.289836888331242,
2705
+ "grad_norm": 3.162994623184204,
2706
+ "learning_rate": 8.771101417668127e-06,
2707
+ "loss": 0.7092,
2708
+ "step": 3650
2709
+ },
2710
+ {
2711
+ "epoch": 2.2961104140526976,
2712
+ "grad_norm": 3.597926139831543,
2713
+ "learning_rate": 8.764615319166885e-06,
2714
+ "loss": 0.7045,
2715
+ "step": 3660
2716
+ },
2717
+ {
2718
+ "epoch": 2.302383939774153,
2719
+ "grad_norm": 3.0928547382354736,
2720
+ "learning_rate": 8.758114560661846e-06,
2721
+ "loss": 0.629,
2722
+ "step": 3670
2723
+ },
2724
+ {
2725
+ "epoch": 2.3086574654956085,
2726
+ "grad_norm": 3.255012273788452,
2727
+ "learning_rate": 8.751599167467985e-06,
2728
+ "loss": 0.6525,
2729
+ "step": 3680
2730
+ },
2731
+ {
2732
+ "epoch": 2.314930991217064,
2733
+ "grad_norm": 4.020911693572998,
2734
+ "learning_rate": 8.745069164957265e-06,
2735
+ "loss": 0.7017,
2736
+ "step": 3690
2737
+ },
2738
+ {
2739
+ "epoch": 2.3212045169385194,
2740
+ "grad_norm": 3.158914089202881,
2741
+ "learning_rate": 8.738524578558547e-06,
2742
+ "loss": 0.6835,
2743
+ "step": 3700
2744
+ },
2745
+ {
2746
+ "epoch": 2.327478042659975,
2747
+ "grad_norm": 3.5864481925964355,
2748
+ "learning_rate": 8.731965433757474e-06,
2749
+ "loss": 0.7069,
2750
+ "step": 3710
2751
+ },
2752
+ {
2753
+ "epoch": 2.3337515683814303,
2754
+ "grad_norm": 3.9232914447784424,
2755
+ "learning_rate": 8.72539175609639e-06,
2756
+ "loss": 0.7064,
2757
+ "step": 3720
2758
+ },
2759
+ {
2760
+ "epoch": 2.3400250941028857,
2761
+ "grad_norm": 2.820852756500244,
2762
+ "learning_rate": 8.718803571174229e-06,
2763
+ "loss": 0.7204,
2764
+ "step": 3730
2765
+ },
2766
+ {
2767
+ "epoch": 2.346298619824341,
2768
+ "grad_norm": 2.5474843978881836,
2769
+ "learning_rate": 8.712200904646417e-06,
2770
+ "loss": 0.6713,
2771
+ "step": 3740
2772
+ },
2773
+ {
2774
+ "epoch": 2.3525721455457966,
2775
+ "grad_norm": 3.648409843444824,
2776
+ "learning_rate": 8.705583782224776e-06,
2777
+ "loss": 0.6741,
2778
+ "step": 3750
2779
+ },
2780
+ {
2781
+ "epoch": 2.358845671267252,
2782
+ "grad_norm": 3.132319211959839,
2783
+ "learning_rate": 8.698952229677422e-06,
2784
+ "loss": 0.6306,
2785
+ "step": 3760
2786
+ },
2787
+ {
2788
+ "epoch": 2.3651191969887075,
2789
+ "grad_norm": 4.013784408569336,
2790
+ "learning_rate": 8.692306272828661e-06,
2791
+ "loss": 0.6905,
2792
+ "step": 3770
2793
+ },
2794
+ {
2795
+ "epoch": 2.371392722710163,
2796
+ "grad_norm": 3.268537998199463,
2797
+ "learning_rate": 8.685645937558896e-06,
2798
+ "loss": 0.7005,
2799
+ "step": 3780
2800
+ },
2801
+ {
2802
+ "epoch": 2.3776662484316184,
2803
+ "grad_norm": 3.449528217315674,
2804
+ "learning_rate": 8.678971249804517e-06,
2805
+ "loss": 0.635,
2806
+ "step": 3790
2807
+ },
2808
+ {
2809
+ "epoch": 2.383939774153074,
2810
+ "grad_norm": 4.023439884185791,
2811
+ "learning_rate": 8.67228223555781e-06,
2812
+ "loss": 0.6709,
2813
+ "step": 3800
2814
+ },
2815
+ {
2816
+ "epoch": 2.383939774153074,
2817
+ "eval_loss": 0.7715116739273071,
2818
+ "eval_runtime": 47.3333,
2819
+ "eval_samples_per_second": 95.07,
2820
+ "eval_steps_per_second": 5.958,
2821
+ "step": 3800
2822
+ },
2823
+ {
2824
+ "epoch": 2.3902132998745294,
2825
+ "grad_norm": 3.0970981121063232,
2826
+ "learning_rate": 8.665578920866844e-06,
2827
+ "loss": 0.7054,
2828
+ "step": 3810
2829
+ },
2830
+ {
2831
+ "epoch": 2.396486825595985,
2832
+ "grad_norm": 3.3581349849700928,
2833
+ "learning_rate": 8.658861331835384e-06,
2834
+ "loss": 0.7049,
2835
+ "step": 3820
2836
+ },
2837
+ {
2838
+ "epoch": 2.4027603513174403,
2839
+ "grad_norm": 3.5304226875305176,
2840
+ "learning_rate": 8.652129494622776e-06,
2841
+ "loss": 0.6614,
2842
+ "step": 3830
2843
+ },
2844
+ {
2845
+ "epoch": 2.4090338770388957,
2846
+ "grad_norm": 3.2534291744232178,
2847
+ "learning_rate": 8.645383435443853e-06,
2848
+ "loss": 0.6782,
2849
+ "step": 3840
2850
+ },
2851
+ {
2852
+ "epoch": 2.415307402760351,
2853
+ "grad_norm": 3.1076700687408447,
2854
+ "learning_rate": 8.638623180568829e-06,
2855
+ "loss": 0.6625,
2856
+ "step": 3850
2857
+ },
2858
+ {
2859
+ "epoch": 2.4215809284818066,
2860
+ "grad_norm": 3.4343130588531494,
2861
+ "learning_rate": 8.631848756323198e-06,
2862
+ "loss": 0.718,
2863
+ "step": 3860
2864
+ },
2865
+ {
2866
+ "epoch": 2.427854454203262,
2867
+ "grad_norm": 3.902872323989868,
2868
+ "learning_rate": 8.625060189087636e-06,
2869
+ "loss": 0.7383,
2870
+ "step": 3870
2871
+ },
2872
+ {
2873
+ "epoch": 2.4341279799247175,
2874
+ "grad_norm": 3.5992660522460938,
2875
+ "learning_rate": 8.618257505297887e-06,
2876
+ "loss": 0.6801,
2877
+ "step": 3880
2878
+ },
2879
+ {
2880
+ "epoch": 2.440401505646173,
2881
+ "grad_norm": 3.3129193782806396,
2882
+ "learning_rate": 8.611440731444673e-06,
2883
+ "loss": 0.7348,
2884
+ "step": 3890
2885
+ },
2886
+ {
2887
+ "epoch": 2.4466750313676284,
2888
+ "grad_norm": 3.9354326725006104,
2889
+ "learning_rate": 8.604609894073583e-06,
2890
+ "loss": 0.703,
2891
+ "step": 3900
2892
+ },
2893
+ {
2894
+ "epoch": 2.452948557089084,
2895
+ "grad_norm": 3.021115779876709,
2896
+ "learning_rate": 8.597765019784972e-06,
2897
+ "loss": 0.6625,
2898
+ "step": 3910
2899
+ },
2900
+ {
2901
+ "epoch": 2.4592220828105393,
2902
+ "grad_norm": 3.3624069690704346,
2903
+ "learning_rate": 8.590906135233854e-06,
2904
+ "loss": 0.6846,
2905
+ "step": 3920
2906
+ },
2907
+ {
2908
+ "epoch": 2.4654956085319952,
2909
+ "grad_norm": 3.6122751235961914,
2910
+ "learning_rate": 8.584033267129807e-06,
2911
+ "loss": 0.6983,
2912
+ "step": 3930
2913
+ },
2914
+ {
2915
+ "epoch": 2.4717691342534502,
2916
+ "grad_norm": 3.244967460632324,
2917
+ "learning_rate": 8.577146442236856e-06,
2918
+ "loss": 0.6882,
2919
+ "step": 3940
2920
+ },
2921
+ {
2922
+ "epoch": 2.478042659974906,
2923
+ "grad_norm": 4.329878807067871,
2924
+ "learning_rate": 8.570245687373384e-06,
2925
+ "loss": 0.6817,
2926
+ "step": 3950
2927
+ },
2928
+ {
2929
+ "epoch": 2.484316185696361,
2930
+ "grad_norm": 3.9007835388183594,
2931
+ "learning_rate": 8.563331029412013e-06,
2932
+ "loss": 0.7394,
2933
+ "step": 3960
2934
+ },
2935
+ {
2936
+ "epoch": 2.490589711417817,
2937
+ "grad_norm": 2.983142137527466,
2938
+ "learning_rate": 8.556402495279506e-06,
2939
+ "loss": 0.6368,
2940
+ "step": 3970
2941
+ },
2942
+ {
2943
+ "epoch": 2.496863237139272,
2944
+ "grad_norm": 3.44690203666687,
2945
+ "learning_rate": 8.549460111956665e-06,
2946
+ "loss": 0.7097,
2947
+ "step": 3980
2948
+ },
2949
+ {
2950
+ "epoch": 2.503136762860728,
2951
+ "grad_norm": 3.9488601684570312,
2952
+ "learning_rate": 8.542503906478224e-06,
2953
+ "loss": 0.6786,
2954
+ "step": 3990
2955
+ },
2956
+ {
2957
+ "epoch": 2.509410288582183,
2958
+ "grad_norm": 3.4564332962036133,
2959
+ "learning_rate": 8.535533905932739e-06,
2960
+ "loss": 0.6639,
2961
+ "step": 4000
2962
+ },
2963
+ {
2964
+ "epoch": 2.509410288582183,
2965
+ "eval_loss": 0.7692662477493286,
2966
+ "eval_runtime": 45.4007,
2967
+ "eval_samples_per_second": 99.118,
2968
+ "eval_steps_per_second": 6.211,
2969
+ "step": 4000
2970
+ },
2971
+ {
2972
+ "epoch": 2.509410288582183,
2973
+ "step": 4000,
2974
+ "total_flos": 5.277760478993449e+17,
2975
+ "train_loss": 0.7607251715660095,
2976
+ "train_runtime": 2991.8413,
2977
+ "train_samples_per_second": 85.232,
2978
+ "train_steps_per_second": 5.328
2979
+ }
2980
+ ],
2981
+ "logging_steps": 10,
2982
+ "max_steps": 15940,
2983
+ "num_input_tokens_seen": 0,
2984
+ "num_train_epochs": 10,
2985
+ "save_steps": 1000,
2986
+ "total_flos": 5.277760478993449e+17,
2987
+ "train_batch_size": 8,
2988
+ "trial_name": null,
2989
+ "trial_params": null
2990
+ }
llama3_8b_peft/news_commentary_it/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce4920d057d1ba234bc6f70d8f762c9ed3f415c74de0d124092a92d314a443a
3
+ size 5176
llama3_8b_peft/news_commentary_it/training_eval_loss.png ADDED
llama3_8b_peft/news_commentary_it/training_loss.png ADDED
llama3_8b_peft/topical_chat/README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: topical_chat_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # topical_chat_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the topical_chat_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 2.1973
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 2
46
+ - total_train_batch_size: 16
47
+ - total_eval_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_steps: 20
51
+ - num_epochs: 10.0
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:------:|:----:|:---------------:|
57
+ | 2.3684 | 0.2110 | 200 | 2.3924 |
58
+ | 2.4128 | 0.4219 | 400 | 2.3357 |
59
+ | 2.2848 | 0.6329 | 600 | 2.2946 |
60
+ | 2.3027 | 0.8439 | 800 | 2.2702 |
61
+ | 2.2643 | 1.0549 | 1000 | 2.2527 |
62
+ | 2.1938 | 1.2658 | 1200 | 2.2371 |
63
+ | 2.1872 | 1.4768 | 1400 | 2.2257 |
64
+ | 2.0937 | 1.6878 | 1600 | 2.2146 |
65
+ | 2.1031 | 1.8987 | 1800 | 2.2047 |
66
+ | 2.1546 | 2.1097 | 2000 | 2.2013 |
67
+ | 2.0292 | 2.3207 | 2200 | 2.1960 |
68
+ | 2.1621 | 2.5316 | 2400 | 2.1942 |
69
+ | 2.1634 | 2.7426 | 2600 | 2.1864 |
70
+ | 2.0756 | 2.9536 | 2800 | 2.1810 |
71
+ | 2.0085 | 3.1646 | 3000 | 2.1973 |
72
+ | 2.0527 | 3.3755 | 3200 | 2.1923 |
73
+ | 1.9061 | 3.5865 | 3400 | 2.1949 |
74
+ | 2.0197 | 3.7975 | 3600 | 2.1859 |
75
+ | 1.9671 | 4.0084 | 3800 | 2.1913 |
76
+ | 1.8848 | 4.2194 | 4000 | 2.2115 |
77
+ | 1.9566 | 4.4304 | 4200 | 2.2033 |
78
+ | 1.9092 | 4.6414 | 4400 | 2.2004 |
79
+ | 1.8674 | 4.8523 | 4600 | 2.2034 |
80
+
81
+
82
+ ### Framework versions
83
+
84
+ - PEFT 0.10.0
85
+ - Transformers 4.40.0
86
+ - Pytorch 2.2.1
87
+ - Datasets 2.18.0
88
+ - Tokenizers 0.19.1
llama3_8b_peft/topical_chat/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "k_proj",
24
+ "up_proj",
25
+ "v_proj",
26
+ "q_proj",
27
+ "gate_proj",
28
+ "down_proj",
29
+ "o_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/topical_chat/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e214a4c910563c5e697b4ab334238a553790dfbb7eaddacbb29612dd27f429
3
+ size 83945296
llama3_8b_peft/topical_chat/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.852320675105485,
3
+ "eval_loss": 2.19728946685791,
4
+ "eval_runtime": 73.4826,
5
+ "eval_samples_per_second": 36.417,
6
+ "eval_steps_per_second": 2.286,
7
+ "total_flos": 2.1607649914755482e+18,
8
+ "train_loss": 2.1172233917402186,
9
+ "train_runtime": 8185.4824,
10
+ "train_samples_per_second": 18.521,
11
+ "train_steps_per_second": 1.158
12
+ }
llama3_8b_peft/topical_chat/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.852320675105485,
3
+ "eval_loss": 2.19728946685791,
4
+ "eval_runtime": 73.4826,
5
+ "eval_samples_per_second": 36.417,
6
+ "eval_steps_per_second": 2.286
7
+ }
llama3_8b_peft/topical_chat/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/topical_chat/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/topical_chat/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }
llama3_8b_peft/topical_chat/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.852320675105485,
3
+ "total_flos": 2.1607649914755482e+18,
4
+ "train_loss": 2.1172233917402186,
5
+ "train_runtime": 8185.4824,
6
+ "train_samples_per_second": 18.521,
7
+ "train_steps_per_second": 1.158
8
+ }
llama3_8b_peft/topical_chat/trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/topical_chat/trainer_state.json ADDED
@@ -0,0 +1,3434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.19728946685791,
3
+ "best_model_checkpoint": "ckpt/llama3_8b_fuze27_no_sys/topical_chat_no_sys/checkpoint-3000",
4
+ "epoch": 4.852320675105485,
5
+ "eval_steps": 200,
6
+ "global_step": 4600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.010548523206751054,
13
+ "grad_norm": 1.7063179016113281,
14
+ "learning_rate": 5e-06,
15
+ "loss": 3.1503,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.02109704641350211,
20
+ "grad_norm": 1.986701488494873,
21
+ "learning_rate": 1e-05,
22
+ "loss": 3.0849,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.03164556962025317,
27
+ "grad_norm": 2.113104820251465,
28
+ "learning_rate": 9.999972428710264e-06,
29
+ "loss": 2.9717,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.04219409282700422,
34
+ "grad_norm": 1.971164584159851,
35
+ "learning_rate": 9.999889715145124e-06,
36
+ "loss": 2.774,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.052742616033755275,
41
+ "grad_norm": 1.938352108001709,
42
+ "learning_rate": 9.999751860216788e-06,
43
+ "loss": 2.4437,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.06329113924050633,
48
+ "grad_norm": 1.2463346719741821,
49
+ "learning_rate": 9.99955886544559e-06,
50
+ "loss": 2.513,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.07383966244725738,
55
+ "grad_norm": 1.5501407384872437,
56
+ "learning_rate": 9.99931073295998e-06,
57
+ "loss": 2.5023,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.08438818565400844,
62
+ "grad_norm": 1.2701120376586914,
63
+ "learning_rate": 9.999007465496488e-06,
64
+ "loss": 2.5034,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.0949367088607595,
69
+ "grad_norm": 1.3754881620407104,
70
+ "learning_rate": 9.998649066399704e-06,
71
+ "loss": 2.383,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.10548523206751055,
76
+ "grad_norm": 1.385846495628357,
77
+ "learning_rate": 9.998235539622239e-06,
78
+ "loss": 2.4444,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.1160337552742616,
83
+ "grad_norm": 1.620155930519104,
84
+ "learning_rate": 9.99776688972468e-06,
85
+ "loss": 2.4288,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.12658227848101267,
90
+ "grad_norm": 1.1363189220428467,
91
+ "learning_rate": 9.997243121875538e-06,
92
+ "loss": 2.4251,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.1371308016877637,
97
+ "grad_norm": 1.373314619064331,
98
+ "learning_rate": 9.996664241851197e-06,
99
+ "loss": 2.4312,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.14767932489451477,
104
+ "grad_norm": 1.5897873640060425,
105
+ "learning_rate": 9.996030256035845e-06,
106
+ "loss": 2.4217,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.15822784810126583,
111
+ "grad_norm": 1.6213446855545044,
112
+ "learning_rate": 9.995341171421402e-06,
113
+ "loss": 2.4883,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.16877637130801687,
118
+ "grad_norm": 1.5709846019744873,
119
+ "learning_rate": 9.99459699560745e-06,
120
+ "loss": 2.4673,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.17932489451476794,
125
+ "grad_norm": 1.5786807537078857,
126
+ "learning_rate": 9.993797736801146e-06,
127
+ "loss": 2.4421,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.189873417721519,
132
+ "grad_norm": 1.5626851320266724,
133
+ "learning_rate": 9.992943403817123e-06,
134
+ "loss": 2.3864,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.20042194092827004,
139
+ "grad_norm": 1.376423954963684,
140
+ "learning_rate": 9.992034006077412e-06,
141
+ "loss": 2.4255,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.2109704641350211,
146
+ "grad_norm": 1.5908286571502686,
147
+ "learning_rate": 9.991069553611317e-06,
148
+ "loss": 2.3684,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.2109704641350211,
153
+ "eval_loss": 2.3923792839050293,
154
+ "eval_runtime": 72.4436,
155
+ "eval_samples_per_second": 36.939,
156
+ "eval_steps_per_second": 2.319,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.22151898734177214,
161
+ "grad_norm": 1.4967713356018066,
162
+ "learning_rate": 9.990050057055319e-06,
163
+ "loss": 2.4513,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.2320675105485232,
168
+ "grad_norm": 1.4598913192749023,
169
+ "learning_rate": 9.98897552765295e-06,
170
+ "loss": 2.3781,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.24261603375527427,
175
+ "grad_norm": 1.8738089799880981,
176
+ "learning_rate": 9.987845977254674e-06,
177
+ "loss": 2.4242,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.25316455696202533,
182
+ "grad_norm": 1.7502378225326538,
183
+ "learning_rate": 9.986661418317759e-06,
184
+ "loss": 2.347,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.26371308016877637,
189
+ "grad_norm": 1.5759859085083008,
190
+ "learning_rate": 9.98542186390613e-06,
191
+ "loss": 2.3179,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.2742616033755274,
196
+ "grad_norm": 1.845045804977417,
197
+ "learning_rate": 9.984127327690232e-06,
198
+ "loss": 2.3928,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.2848101265822785,
203
+ "grad_norm": 1.7819545269012451,
204
+ "learning_rate": 9.98277782394688e-06,
205
+ "loss": 2.3468,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.29535864978902954,
210
+ "grad_norm": 1.9312375783920288,
211
+ "learning_rate": 9.981373367559095e-06,
212
+ "loss": 2.4083,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.3059071729957806,
217
+ "grad_norm": 1.6815539598464966,
218
+ "learning_rate": 9.97991397401595e-06,
219
+ "loss": 2.3588,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.31645569620253167,
224
+ "grad_norm": 2.412928819656372,
225
+ "learning_rate": 9.978399659412388e-06,
226
+ "loss": 2.4602,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.3270042194092827,
231
+ "grad_norm": 1.8568015098571777,
232
+ "learning_rate": 9.97683044044905e-06,
233
+ "loss": 2.3401,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.33755274261603374,
238
+ "grad_norm": 2.0904953479766846,
239
+ "learning_rate": 9.975206334432094e-06,
240
+ "loss": 2.3261,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.34810126582278483,
245
+ "grad_norm": 2.240638256072998,
246
+ "learning_rate": 9.973527359272998e-06,
247
+ "loss": 2.4249,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.35864978902953587,
252
+ "grad_norm": 1.6561917066574097,
253
+ "learning_rate": 9.97179353348837e-06,
254
+ "loss": 2.3165,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.3691983122362869,
259
+ "grad_norm": 1.7858705520629883,
260
+ "learning_rate": 9.970004876199731e-06,
261
+ "loss": 2.3943,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.379746835443038,
266
+ "grad_norm": 1.893686056137085,
267
+ "learning_rate": 9.968161407133317e-06,
268
+ "loss": 2.275,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.39029535864978904,
273
+ "grad_norm": 1.7844775915145874,
274
+ "learning_rate": 9.966263146619857e-06,
275
+ "loss": 2.382,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.4008438818565401,
280
+ "grad_norm": 1.966732144355774,
281
+ "learning_rate": 9.964310115594347e-06,
282
+ "loss": 2.2825,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.41139240506329117,
287
+ "grad_norm": 2.0981357097625732,
288
+ "learning_rate": 9.96230233559582e-06,
289
+ "loss": 2.394,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 0.4219409282700422,
294
+ "grad_norm": 2.20562744140625,
295
+ "learning_rate": 9.96023982876711e-06,
296
+ "loss": 2.4128,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 0.4219409282700422,
301
+ "eval_loss": 2.3357460498809814,
302
+ "eval_runtime": 72.4293,
303
+ "eval_samples_per_second": 36.946,
304
+ "eval_steps_per_second": 2.32,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 0.43248945147679324,
309
+ "grad_norm": 1.8895173072814941,
310
+ "learning_rate": 9.95812261785461e-06,
311
+ "loss": 2.3132,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 0.4430379746835443,
316
+ "grad_norm": 2.3103229999542236,
317
+ "learning_rate": 9.955950726208006e-06,
318
+ "loss": 2.3255,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 0.45358649789029537,
323
+ "grad_norm": 1.7691924571990967,
324
+ "learning_rate": 9.953724177780047e-06,
325
+ "loss": 2.3156,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 0.4641350210970464,
330
+ "grad_norm": 2.0431466102600098,
331
+ "learning_rate": 9.951442997126253e-06,
332
+ "loss": 2.2881,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 0.47468354430379744,
337
+ "grad_norm": 2.024620532989502,
338
+ "learning_rate": 9.949107209404664e-06,
339
+ "loss": 2.3883,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 0.48523206751054854,
344
+ "grad_norm": 2.369744062423706,
345
+ "learning_rate": 9.946716840375552e-06,
346
+ "loss": 2.3151,
347
+ "step": 460
348
+ },
349
+ {
350
+ "epoch": 0.4957805907172996,
351
+ "grad_norm": 2.1363399028778076,
352
+ "learning_rate": 9.944271916401139e-06,
353
+ "loss": 2.3275,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 0.5063291139240507,
358
+ "grad_norm": 1.8980962038040161,
359
+ "learning_rate": 9.941772464445306e-06,
360
+ "loss": 2.3175,
361
+ "step": 480
362
+ },
363
+ {
364
+ "epoch": 0.5168776371308017,
365
+ "grad_norm": 2.313436985015869,
366
+ "learning_rate": 9.939218512073301e-06,
367
+ "loss": 2.4111,
368
+ "step": 490
369
+ },
370
+ {
371
+ "epoch": 0.5274261603375527,
372
+ "grad_norm": 2.2074170112609863,
373
+ "learning_rate": 9.936610087451428e-06,
374
+ "loss": 2.2846,
375
+ "step": 500
376
+ },
377
+ {
378
+ "epoch": 0.5379746835443038,
379
+ "grad_norm": 2.277611017227173,
380
+ "learning_rate": 9.93394721934674e-06,
381
+ "loss": 2.3471,
382
+ "step": 510
383
+ },
384
+ {
385
+ "epoch": 0.5485232067510548,
386
+ "grad_norm": 2.072939157485962,
387
+ "learning_rate": 9.931229937126719e-06,
388
+ "loss": 2.3272,
389
+ "step": 520
390
+ },
391
+ {
392
+ "epoch": 0.5590717299578059,
393
+ "grad_norm": 2.158026933670044,
394
+ "learning_rate": 9.928458270758955e-06,
395
+ "loss": 2.2773,
396
+ "step": 530
397
+ },
398
+ {
399
+ "epoch": 0.569620253164557,
400
+ "grad_norm": 2.133711099624634,
401
+ "learning_rate": 9.925632250810817e-06,
402
+ "loss": 2.3013,
403
+ "step": 540
404
+ },
405
+ {
406
+ "epoch": 0.580168776371308,
407
+ "grad_norm": 3.869311571121216,
408
+ "learning_rate": 9.92275190844911e-06,
409
+ "loss": 2.3261,
410
+ "step": 550
411
+ },
412
+ {
413
+ "epoch": 0.5907172995780591,
414
+ "grad_norm": 2.278590440750122,
415
+ "learning_rate": 9.91981727543973e-06,
416
+ "loss": 2.2665,
417
+ "step": 560
418
+ },
419
+ {
420
+ "epoch": 0.6012658227848101,
421
+ "grad_norm": 2.3577473163604736,
422
+ "learning_rate": 9.91682838414733e-06,
423
+ "loss": 2.2675,
424
+ "step": 570
425
+ },
426
+ {
427
+ "epoch": 0.6118143459915611,
428
+ "grad_norm": 1.974576711654663,
429
+ "learning_rate": 9.913785267534945e-06,
430
+ "loss": 2.2872,
431
+ "step": 580
432
+ },
433
+ {
434
+ "epoch": 0.6223628691983122,
435
+ "grad_norm": 2.5490148067474365,
436
+ "learning_rate": 9.910687959163634e-06,
437
+ "loss": 2.2921,
438
+ "step": 590
439
+ },
440
+ {
441
+ "epoch": 0.6329113924050633,
442
+ "grad_norm": 2.452335834503174,
443
+ "learning_rate": 9.90753649319211e-06,
444
+ "loss": 2.2848,
445
+ "step": 600
446
+ },
447
+ {
448
+ "epoch": 0.6329113924050633,
449
+ "eval_loss": 2.294635534286499,
450
+ "eval_runtime": 74.0336,
451
+ "eval_samples_per_second": 36.146,
452
+ "eval_steps_per_second": 2.269,
453
+ "step": 600
454
+ },
455
+ {
456
+ "epoch": 0.6434599156118144,
457
+ "grad_norm": 2.399472236633301,
458
+ "learning_rate": 9.904330904376368e-06,
459
+ "loss": 2.2704,
460
+ "step": 610
461
+ },
462
+ {
463
+ "epoch": 0.6540084388185654,
464
+ "grad_norm": 2.5536320209503174,
465
+ "learning_rate": 9.90107122806929e-06,
466
+ "loss": 2.3228,
467
+ "step": 620
468
+ },
469
+ {
470
+ "epoch": 0.6645569620253164,
471
+ "grad_norm": 2.2338976860046387,
472
+ "learning_rate": 9.897757500220275e-06,
473
+ "loss": 2.2389,
474
+ "step": 630
475
+ },
476
+ {
477
+ "epoch": 0.6751054852320675,
478
+ "grad_norm": 2.7006072998046875,
479
+ "learning_rate": 9.894389757374818e-06,
480
+ "loss": 2.3046,
481
+ "step": 640
482
+ },
483
+ {
484
+ "epoch": 0.6856540084388185,
485
+ "grad_norm": 2.3376288414001465,
486
+ "learning_rate": 9.89096803667413e-06,
487
+ "loss": 2.2457,
488
+ "step": 650
489
+ },
490
+ {
491
+ "epoch": 0.6962025316455697,
492
+ "grad_norm": 2.6427547931671143,
493
+ "learning_rate": 9.887492375854705e-06,
494
+ "loss": 2.2754,
495
+ "step": 660
496
+ },
497
+ {
498
+ "epoch": 0.7067510548523207,
499
+ "grad_norm": 2.253121852874756,
500
+ "learning_rate": 9.883962813247928e-06,
501
+ "loss": 2.4092,
502
+ "step": 670
503
+ },
504
+ {
505
+ "epoch": 0.7172995780590717,
506
+ "grad_norm": 2.317591428756714,
507
+ "learning_rate": 9.880379387779637e-06,
508
+ "loss": 2.3067,
509
+ "step": 680
510
+ },
511
+ {
512
+ "epoch": 0.7278481012658228,
513
+ "grad_norm": 2.6488993167877197,
514
+ "learning_rate": 9.876742138969695e-06,
515
+ "loss": 2.2451,
516
+ "step": 690
517
+ },
518
+ {
519
+ "epoch": 0.7383966244725738,
520
+ "grad_norm": 2.483212947845459,
521
+ "learning_rate": 9.873051106931557e-06,
522
+ "loss": 2.2985,
523
+ "step": 700
524
+ },
525
+ {
526
+ "epoch": 0.7489451476793249,
527
+ "grad_norm": 2.187453269958496,
528
+ "learning_rate": 9.86930633237183e-06,
529
+ "loss": 2.2701,
530
+ "step": 710
531
+ },
532
+ {
533
+ "epoch": 0.759493670886076,
534
+ "grad_norm": 2.5551323890686035,
535
+ "learning_rate": 9.865507856589822e-06,
536
+ "loss": 2.2569,
537
+ "step": 720
538
+ },
539
+ {
540
+ "epoch": 0.770042194092827,
541
+ "grad_norm": 2.497452735900879,
542
+ "learning_rate": 9.861655721477082e-06,
543
+ "loss": 2.2355,
544
+ "step": 730
545
+ },
546
+ {
547
+ "epoch": 0.7805907172995781,
548
+ "grad_norm": 2.444840669631958,
549
+ "learning_rate": 9.857749969516942e-06,
550
+ "loss": 2.2884,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 0.7911392405063291,
555
+ "grad_norm": 2.9105498790740967,
556
+ "learning_rate": 9.853790643784047e-06,
557
+ "loss": 2.2208,
558
+ "step": 750
559
+ },
560
+ {
561
+ "epoch": 0.8016877637130801,
562
+ "grad_norm": 2.301388740539551,
563
+ "learning_rate": 9.84977778794389e-06,
564
+ "loss": 2.2916,
565
+ "step": 760
566
+ },
567
+ {
568
+ "epoch": 0.8122362869198312,
569
+ "grad_norm": 2.779360294342041,
570
+ "learning_rate": 9.845711446252313e-06,
571
+ "loss": 2.3143,
572
+ "step": 770
573
+ },
574
+ {
575
+ "epoch": 0.8227848101265823,
576
+ "grad_norm": 2.28932785987854,
577
+ "learning_rate": 9.84159166355503e-06,
578
+ "loss": 2.3225,
579
+ "step": 780
580
+ },
581
+ {
582
+ "epoch": 0.8333333333333334,
583
+ "grad_norm": 2.710916519165039,
584
+ "learning_rate": 9.837418485287126e-06,
585
+ "loss": 2.2709,
586
+ "step": 790
587
+ },
588
+ {
589
+ "epoch": 0.8438818565400844,
590
+ "grad_norm": 2.186706781387329,
591
+ "learning_rate": 9.83319195747257e-06,
592
+ "loss": 2.3027,
593
+ "step": 800
594
+ },
595
+ {
596
+ "epoch": 0.8438818565400844,
597
+ "eval_loss": 2.270158290863037,
598
+ "eval_runtime": 72.4255,
599
+ "eval_samples_per_second": 36.948,
600
+ "eval_steps_per_second": 2.32,
601
+ "step": 800
602
+ },
603
+ {
604
+ "epoch": 0.8544303797468354,
605
+ "grad_norm": 2.3358771800994873,
606
+ "learning_rate": 9.828912126723689e-06,
607
+ "loss": 2.3138,
608
+ "step": 810
609
+ },
610
+ {
611
+ "epoch": 0.8649789029535865,
612
+ "grad_norm": 2.4724321365356445,
613
+ "learning_rate": 9.824579040240663e-06,
614
+ "loss": 2.2824,
615
+ "step": 820
616
+ },
617
+ {
618
+ "epoch": 0.8755274261603375,
619
+ "grad_norm": 2.5058224201202393,
620
+ "learning_rate": 9.820192745811005e-06,
621
+ "loss": 2.2418,
622
+ "step": 830
623
+ },
624
+ {
625
+ "epoch": 0.8860759493670886,
626
+ "grad_norm": 2.5932464599609375,
627
+ "learning_rate": 9.815753291809035e-06,
628
+ "loss": 2.2283,
629
+ "step": 840
630
+ },
631
+ {
632
+ "epoch": 0.8966244725738397,
633
+ "grad_norm": 2.7198591232299805,
634
+ "learning_rate": 9.81126072719534e-06,
635
+ "loss": 2.2656,
636
+ "step": 850
637
+ },
638
+ {
639
+ "epoch": 0.9071729957805907,
640
+ "grad_norm": 2.4786908626556396,
641
+ "learning_rate": 9.806715101516243e-06,
642
+ "loss": 2.3116,
643
+ "step": 860
644
+ },
645
+ {
646
+ "epoch": 0.9177215189873418,
647
+ "grad_norm": 2.62888240814209,
648
+ "learning_rate": 9.802116464903246e-06,
649
+ "loss": 2.3526,
650
+ "step": 870
651
+ },
652
+ {
653
+ "epoch": 0.9282700421940928,
654
+ "grad_norm": 2.749924659729004,
655
+ "learning_rate": 9.797464868072489e-06,
656
+ "loss": 2.159,
657
+ "step": 880
658
+ },
659
+ {
660
+ "epoch": 0.9388185654008439,
661
+ "grad_norm": 2.126293182373047,
662
+ "learning_rate": 9.792760362324177e-06,
663
+ "loss": 2.2137,
664
+ "step": 890
665
+ },
666
+ {
667
+ "epoch": 0.9493670886075949,
668
+ "grad_norm": 3.1559131145477295,
669
+ "learning_rate": 9.78800299954203e-06,
670
+ "loss": 2.2044,
671
+ "step": 900
672
+ },
673
+ {
674
+ "epoch": 0.959915611814346,
675
+ "grad_norm": 2.785879611968994,
676
+ "learning_rate": 9.7831928321927e-06,
677
+ "loss": 2.2762,
678
+ "step": 910
679
+ },
680
+ {
681
+ "epoch": 0.9704641350210971,
682
+ "grad_norm": 2.7676944732666016,
683
+ "learning_rate": 9.77832991332519e-06,
684
+ "loss": 2.2568,
685
+ "step": 920
686
+ },
687
+ {
688
+ "epoch": 0.9810126582278481,
689
+ "grad_norm": 2.469571590423584,
690
+ "learning_rate": 9.773414296570282e-06,
691
+ "loss": 2.2713,
692
+ "step": 930
693
+ },
694
+ {
695
+ "epoch": 0.9915611814345991,
696
+ "grad_norm": 2.553380012512207,
697
+ "learning_rate": 9.768446036139932e-06,
698
+ "loss": 2.2773,
699
+ "step": 940
700
+ },
701
+ {
702
+ "epoch": 1.0021097046413503,
703
+ "grad_norm": 2.557131767272949,
704
+ "learning_rate": 9.763425186826678e-06,
705
+ "loss": 2.2571,
706
+ "step": 950
707
+ },
708
+ {
709
+ "epoch": 1.0126582278481013,
710
+ "grad_norm": 2.2884347438812256,
711
+ "learning_rate": 9.758351804003037e-06,
712
+ "loss": 2.203,
713
+ "step": 960
714
+ },
715
+ {
716
+ "epoch": 1.0232067510548524,
717
+ "grad_norm": 2.4316587448120117,
718
+ "learning_rate": 9.753225943620894e-06,
719
+ "loss": 2.2082,
720
+ "step": 970
721
+ },
722
+ {
723
+ "epoch": 1.0337552742616034,
724
+ "grad_norm": 2.7469358444213867,
725
+ "learning_rate": 9.74804766221088e-06,
726
+ "loss": 2.2026,
727
+ "step": 980
728
+ },
729
+ {
730
+ "epoch": 1.0443037974683544,
731
+ "grad_norm": 2.4454386234283447,
732
+ "learning_rate": 9.742817016881754e-06,
733
+ "loss": 2.1731,
734
+ "step": 990
735
+ },
736
+ {
737
+ "epoch": 1.0548523206751055,
738
+ "grad_norm": 2.6101303100585938,
739
+ "learning_rate": 9.737534065319772e-06,
740
+ "loss": 2.2643,
741
+ "step": 1000
742
+ },
743
+ {
744
+ "epoch": 1.0548523206751055,
745
+ "eval_loss": 2.252653121948242,
746
+ "eval_runtime": 72.3657,
747
+ "eval_samples_per_second": 36.979,
748
+ "eval_steps_per_second": 2.322,
749
+ "step": 1000
750
+ },
751
+ {
752
+ "epoch": 1.0654008438818565,
753
+ "grad_norm": 2.6304173469543457,
754
+ "learning_rate": 9.732198865788047e-06,
755
+ "loss": 2.0906,
756
+ "step": 1010
757
+ },
758
+ {
759
+ "epoch": 1.0759493670886076,
760
+ "grad_norm": 2.8806917667388916,
761
+ "learning_rate": 9.726811477125915e-06,
762
+ "loss": 2.1651,
763
+ "step": 1020
764
+ },
765
+ {
766
+ "epoch": 1.0864978902953586,
767
+ "grad_norm": 2.675865650177002,
768
+ "learning_rate": 9.721371958748276e-06,
769
+ "loss": 2.2193,
770
+ "step": 1030
771
+ },
772
+ {
773
+ "epoch": 1.0970464135021096,
774
+ "grad_norm": 2.775754928588867,
775
+ "learning_rate": 9.715880370644943e-06,
776
+ "loss": 2.2873,
777
+ "step": 1040
778
+ },
779
+ {
780
+ "epoch": 1.1075949367088607,
781
+ "grad_norm": 3.139587879180908,
782
+ "learning_rate": 9.710336773379984e-06,
783
+ "loss": 2.2342,
784
+ "step": 1050
785
+ },
786
+ {
787
+ "epoch": 1.1181434599156117,
788
+ "grad_norm": 2.364351511001587,
789
+ "learning_rate": 9.704741228091052e-06,
790
+ "loss": 2.2708,
791
+ "step": 1060
792
+ },
793
+ {
794
+ "epoch": 1.128691983122363,
795
+ "grad_norm": 2.773282766342163,
796
+ "learning_rate": 9.699093796488705e-06,
797
+ "loss": 2.2478,
798
+ "step": 1070
799
+ },
800
+ {
801
+ "epoch": 1.139240506329114,
802
+ "grad_norm": 2.5701181888580322,
803
+ "learning_rate": 9.693394540855732e-06,
804
+ "loss": 2.1281,
805
+ "step": 1080
806
+ },
807
+ {
808
+ "epoch": 1.149789029535865,
809
+ "grad_norm": 2.9195852279663086,
810
+ "learning_rate": 9.687643524046463e-06,
811
+ "loss": 2.1916,
812
+ "step": 1090
813
+ },
814
+ {
815
+ "epoch": 1.160337552742616,
816
+ "grad_norm": 2.9766101837158203,
817
+ "learning_rate": 9.681840809486083e-06,
818
+ "loss": 2.1888,
819
+ "step": 1100
820
+ },
821
+ {
822
+ "epoch": 1.1708860759493671,
823
+ "grad_norm": 2.440479278564453,
824
+ "learning_rate": 9.675986461169917e-06,
825
+ "loss": 2.3596,
826
+ "step": 1110
827
+ },
828
+ {
829
+ "epoch": 1.1814345991561181,
830
+ "grad_norm": 3.0085349082946777,
831
+ "learning_rate": 9.670080543662742e-06,
832
+ "loss": 2.1661,
833
+ "step": 1120
834
+ },
835
+ {
836
+ "epoch": 1.1919831223628692,
837
+ "grad_norm": 2.99704909324646,
838
+ "learning_rate": 9.664123122098059e-06,
839
+ "loss": 2.2364,
840
+ "step": 1130
841
+ },
842
+ {
843
+ "epoch": 1.2025316455696202,
844
+ "grad_norm": 2.998521327972412,
845
+ "learning_rate": 9.65811426217739e-06,
846
+ "loss": 2.1563,
847
+ "step": 1140
848
+ },
849
+ {
850
+ "epoch": 1.2130801687763713,
851
+ "grad_norm": 2.9255738258361816,
852
+ "learning_rate": 9.65205403016954e-06,
853
+ "loss": 2.2028,
854
+ "step": 1150
855
+ },
856
+ {
857
+ "epoch": 1.2236286919831223,
858
+ "grad_norm": 2.861806631088257,
859
+ "learning_rate": 9.645942492909875e-06,
860
+ "loss": 2.166,
861
+ "step": 1160
862
+ },
863
+ {
864
+ "epoch": 1.2341772151898733,
865
+ "grad_norm": 2.888535261154175,
866
+ "learning_rate": 9.639779717799582e-06,
867
+ "loss": 2.2316,
868
+ "step": 1170
869
+ },
870
+ {
871
+ "epoch": 1.2447257383966246,
872
+ "grad_norm": 3.1299514770507812,
873
+ "learning_rate": 9.63356577280492e-06,
874
+ "loss": 2.1605,
875
+ "step": 1180
876
+ },
877
+ {
878
+ "epoch": 1.2552742616033754,
879
+ "grad_norm": 3.181309461593628,
880
+ "learning_rate": 9.627300726456486e-06,
881
+ "loss": 2.1866,
882
+ "step": 1190
883
+ },
884
+ {
885
+ "epoch": 1.2658227848101267,
886
+ "grad_norm": 2.939859390258789,
887
+ "learning_rate": 9.62098464784844e-06,
888
+ "loss": 2.1938,
889
+ "step": 1200
890
+ },
891
+ {
892
+ "epoch": 1.2658227848101267,
893
+ "eval_loss": 2.2371487617492676,
894
+ "eval_runtime": 73.7924,
895
+ "eval_samples_per_second": 36.264,
896
+ "eval_steps_per_second": 2.277,
897
+ "step": 1200
898
+ },
899
+ {
900
+ "epoch": 1.2763713080168777,
901
+ "grad_norm": 3.16129207611084,
902
+ "learning_rate": 9.614617606637756e-06,
903
+ "loss": 2.2481,
904
+ "step": 1210
905
+ },
906
+ {
907
+ "epoch": 1.2869198312236287,
908
+ "grad_norm": 3.122466564178467,
909
+ "learning_rate": 9.608199673043447e-06,
910
+ "loss": 2.1828,
911
+ "step": 1220
912
+ },
913
+ {
914
+ "epoch": 1.2974683544303798,
915
+ "grad_norm": 2.9260802268981934,
916
+ "learning_rate": 9.601730917845798e-06,
917
+ "loss": 2.2265,
918
+ "step": 1230
919
+ },
920
+ {
921
+ "epoch": 1.3080168776371308,
922
+ "grad_norm": 3.076716423034668,
923
+ "learning_rate": 9.595211412385579e-06,
924
+ "loss": 2.2432,
925
+ "step": 1240
926
+ },
927
+ {
928
+ "epoch": 1.3185654008438819,
929
+ "grad_norm": 3.099963426589966,
930
+ "learning_rate": 9.588641228563257e-06,
931
+ "loss": 2.2253,
932
+ "step": 1250
933
+ },
934
+ {
935
+ "epoch": 1.3291139240506329,
936
+ "grad_norm": 3.0182437896728516,
937
+ "learning_rate": 9.582020438838213e-06,
938
+ "loss": 2.2205,
939
+ "step": 1260
940
+ },
941
+ {
942
+ "epoch": 1.339662447257384,
943
+ "grad_norm": 3.2867844104766846,
944
+ "learning_rate": 9.575349116227927e-06,
945
+ "loss": 2.1414,
946
+ "step": 1270
947
+ },
948
+ {
949
+ "epoch": 1.350210970464135,
950
+ "grad_norm": 3.032796621322632,
951
+ "learning_rate": 9.56862733430719e-06,
952
+ "loss": 2.2511,
953
+ "step": 1280
954
+ },
955
+ {
956
+ "epoch": 1.360759493670886,
957
+ "grad_norm": 2.8553504943847656,
958
+ "learning_rate": 9.561855167207276e-06,
959
+ "loss": 2.1994,
960
+ "step": 1290
961
+ },
962
+ {
963
+ "epoch": 1.371308016877637,
964
+ "grad_norm": 2.870274782180786,
965
+ "learning_rate": 9.555032689615145e-06,
966
+ "loss": 2.1685,
967
+ "step": 1300
968
+ },
969
+ {
970
+ "epoch": 1.3818565400843883,
971
+ "grad_norm": 2.965949296951294,
972
+ "learning_rate": 9.548159976772593e-06,
973
+ "loss": 2.1212,
974
+ "step": 1310
975
+ },
976
+ {
977
+ "epoch": 1.3924050632911391,
978
+ "grad_norm": 2.8772921562194824,
979
+ "learning_rate": 9.541237104475445e-06,
980
+ "loss": 2.2493,
981
+ "step": 1320
982
+ },
983
+ {
984
+ "epoch": 1.4029535864978904,
985
+ "grad_norm": 3.4942052364349365,
986
+ "learning_rate": 9.534264149072709e-06,
987
+ "loss": 2.1716,
988
+ "step": 1330
989
+ },
990
+ {
991
+ "epoch": 1.4135021097046414,
992
+ "grad_norm": 3.023036003112793,
993
+ "learning_rate": 9.527241187465735e-06,
994
+ "loss": 2.1314,
995
+ "step": 1340
996
+ },
997
+ {
998
+ "epoch": 1.4240506329113924,
999
+ "grad_norm": 3.17773699760437,
1000
+ "learning_rate": 9.520168297107364e-06,
1001
+ "loss": 2.2112,
1002
+ "step": 1350
1003
+ },
1004
+ {
1005
+ "epoch": 1.4345991561181435,
1006
+ "grad_norm": 3.7757740020751953,
1007
+ "learning_rate": 9.513045556001082e-06,
1008
+ "loss": 2.1952,
1009
+ "step": 1360
1010
+ },
1011
+ {
1012
+ "epoch": 1.4451476793248945,
1013
+ "grad_norm": 3.286278247833252,
1014
+ "learning_rate": 9.50587304270015e-06,
1015
+ "loss": 2.1897,
1016
+ "step": 1370
1017
+ },
1018
+ {
1019
+ "epoch": 1.4556962025316456,
1020
+ "grad_norm": 3.0467209815979004,
1021
+ "learning_rate": 9.498650836306748e-06,
1022
+ "loss": 2.2006,
1023
+ "step": 1380
1024
+ },
1025
+ {
1026
+ "epoch": 1.4662447257383966,
1027
+ "grad_norm": 2.9933762550354004,
1028
+ "learning_rate": 9.491379016471092e-06,
1029
+ "loss": 2.2045,
1030
+ "step": 1390
1031
+ },
1032
+ {
1033
+ "epoch": 1.4767932489451476,
1034
+ "grad_norm": 3.3392703533172607,
1035
+ "learning_rate": 9.484057663390565e-06,
1036
+ "loss": 2.1872,
1037
+ "step": 1400
1038
+ },
1039
+ {
1040
+ "epoch": 1.4767932489451476,
1041
+ "eval_loss": 2.2257068157196045,
1042
+ "eval_runtime": 72.3805,
1043
+ "eval_samples_per_second": 36.971,
1044
+ "eval_steps_per_second": 2.321,
1045
+ "step": 1400
1046
+ },
1047
+ {
1048
+ "epoch": 1.4873417721518987,
1049
+ "grad_norm": 3.325604200363159,
1050
+ "learning_rate": 9.476686857808823e-06,
1051
+ "loss": 2.171,
1052
+ "step": 1410
1053
+ },
1054
+ {
1055
+ "epoch": 1.49789029535865,
1056
+ "grad_norm": 3.2650458812713623,
1057
+ "learning_rate": 9.469266681014914e-06,
1058
+ "loss": 2.2007,
1059
+ "step": 1420
1060
+ },
1061
+ {
1062
+ "epoch": 1.5084388185654007,
1063
+ "grad_norm": 2.956291437149048,
1064
+ "learning_rate": 9.461797214842375e-06,
1065
+ "loss": 2.165,
1066
+ "step": 1430
1067
+ },
1068
+ {
1069
+ "epoch": 1.518987341772152,
1070
+ "grad_norm": 3.3491480350494385,
1071
+ "learning_rate": 9.454278541668334e-06,
1072
+ "loss": 2.1783,
1073
+ "step": 1440
1074
+ },
1075
+ {
1076
+ "epoch": 1.5295358649789028,
1077
+ "grad_norm": 3.5115392208099365,
1078
+ "learning_rate": 9.446710744412595e-06,
1079
+ "loss": 2.1754,
1080
+ "step": 1450
1081
+ },
1082
+ {
1083
+ "epoch": 1.540084388185654,
1084
+ "grad_norm": 2.963102102279663,
1085
+ "learning_rate": 9.439093906536732e-06,
1086
+ "loss": 2.0977,
1087
+ "step": 1460
1088
+ },
1089
+ {
1090
+ "epoch": 1.5506329113924051,
1091
+ "grad_norm": 3.5968339443206787,
1092
+ "learning_rate": 9.431428112043164e-06,
1093
+ "loss": 2.2207,
1094
+ "step": 1470
1095
+ },
1096
+ {
1097
+ "epoch": 1.5611814345991561,
1098
+ "grad_norm": 3.103698968887329,
1099
+ "learning_rate": 9.423713445474224e-06,
1100
+ "loss": 2.1644,
1101
+ "step": 1480
1102
+ },
1103
+ {
1104
+ "epoch": 1.5717299578059072,
1105
+ "grad_norm": 3.181288003921509,
1106
+ "learning_rate": 9.415949991911238e-06,
1107
+ "loss": 2.2277,
1108
+ "step": 1490
1109
+ },
1110
+ {
1111
+ "epoch": 1.5822784810126582,
1112
+ "grad_norm": 3.236287832260132,
1113
+ "learning_rate": 9.408137836973574e-06,
1114
+ "loss": 2.1953,
1115
+ "step": 1500
1116
+ },
1117
+ {
1118
+ "epoch": 1.5928270042194093,
1119
+ "grad_norm": 3.209091901779175,
1120
+ "learning_rate": 9.400277066817709e-06,
1121
+ "loss": 2.1765,
1122
+ "step": 1510
1123
+ },
1124
+ {
1125
+ "epoch": 1.6033755274261603,
1126
+ "grad_norm": 2.7873599529266357,
1127
+ "learning_rate": 9.392367768136272e-06,
1128
+ "loss": 2.1582,
1129
+ "step": 1520
1130
+ },
1131
+ {
1132
+ "epoch": 1.6139240506329116,
1133
+ "grad_norm": 3.7256994247436523,
1134
+ "learning_rate": 9.384410028157086e-06,
1135
+ "loss": 2.2087,
1136
+ "step": 1530
1137
+ },
1138
+ {
1139
+ "epoch": 1.6244725738396624,
1140
+ "grad_norm": 2.941373109817505,
1141
+ "learning_rate": 9.37640393464222e-06,
1142
+ "loss": 2.2451,
1143
+ "step": 1540
1144
+ },
1145
+ {
1146
+ "epoch": 1.6350210970464136,
1147
+ "grad_norm": 3.7483580112457275,
1148
+ "learning_rate": 9.368349575886995e-06,
1149
+ "loss": 2.2322,
1150
+ "step": 1550
1151
+ },
1152
+ {
1153
+ "epoch": 1.6455696202531644,
1154
+ "grad_norm": 4.179591655731201,
1155
+ "learning_rate": 9.36024704071904e-06,
1156
+ "loss": 2.1859,
1157
+ "step": 1560
1158
+ },
1159
+ {
1160
+ "epoch": 1.6561181434599157,
1161
+ "grad_norm": 3.725693702697754,
1162
+ "learning_rate": 9.352096418497289e-06,
1163
+ "loss": 2.1565,
1164
+ "step": 1570
1165
+ },
1166
+ {
1167
+ "epoch": 1.6666666666666665,
1168
+ "grad_norm": 3.3518712520599365,
1169
+ "learning_rate": 9.343897799111012e-06,
1170
+ "loss": 2.2024,
1171
+ "step": 1580
1172
+ },
1173
+ {
1174
+ "epoch": 1.6772151898734178,
1175
+ "grad_norm": 4.14971399307251,
1176
+ "learning_rate": 9.335651272978814e-06,
1177
+ "loss": 2.149,
1178
+ "step": 1590
1179
+ },
1180
+ {
1181
+ "epoch": 1.6877637130801688,
1182
+ "grad_norm": 3.21273136138916,
1183
+ "learning_rate": 9.327356931047636e-06,
1184
+ "loss": 2.0937,
1185
+ "step": 1600
1186
+ },
1187
+ {
1188
+ "epoch": 1.6877637130801688,
1189
+ "eval_loss": 2.2145838737487793,
1190
+ "eval_runtime": 72.3718,
1191
+ "eval_samples_per_second": 36.976,
1192
+ "eval_steps_per_second": 2.321,
1193
+ "step": 1600
1194
+ },
1195
+ {
1196
+ "epoch": 1.6983122362869199,
1197
+ "grad_norm": 3.091708183288574,
1198
+ "learning_rate": 9.319014864791762e-06,
1199
+ "loss": 2.1361,
1200
+ "step": 1610
1201
+ },
1202
+ {
1203
+ "epoch": 1.7088607594936709,
1204
+ "grad_norm": 3.416271686553955,
1205
+ "learning_rate": 9.310625166211802e-06,
1206
+ "loss": 2.1109,
1207
+ "step": 1620
1208
+ },
1209
+ {
1210
+ "epoch": 1.719409282700422,
1211
+ "grad_norm": 3.6835880279541016,
1212
+ "learning_rate": 9.30218792783368e-06,
1213
+ "loss": 2.1696,
1214
+ "step": 1630
1215
+ },
1216
+ {
1217
+ "epoch": 1.729957805907173,
1218
+ "grad_norm": 3.0759336948394775,
1219
+ "learning_rate": 9.293703242707613e-06,
1220
+ "loss": 2.1512,
1221
+ "step": 1640
1222
+ },
1223
+ {
1224
+ "epoch": 1.740506329113924,
1225
+ "grad_norm": 3.125152826309204,
1226
+ "learning_rate": 9.285171204407086e-06,
1227
+ "loss": 2.1959,
1228
+ "step": 1650
1229
+ },
1230
+ {
1231
+ "epoch": 1.7510548523206753,
1232
+ "grad_norm": 3.414592742919922,
1233
+ "learning_rate": 9.27659190702782e-06,
1234
+ "loss": 2.1791,
1235
+ "step": 1660
1236
+ },
1237
+ {
1238
+ "epoch": 1.761603375527426,
1239
+ "grad_norm": 4.3924641609191895,
1240
+ "learning_rate": 9.267965445186733e-06,
1241
+ "loss": 2.164,
1242
+ "step": 1670
1243
+ },
1244
+ {
1245
+ "epoch": 1.7721518987341773,
1246
+ "grad_norm": 3.432742118835449,
1247
+ "learning_rate": 9.259291914020893e-06,
1248
+ "loss": 2.1679,
1249
+ "step": 1680
1250
+ },
1251
+ {
1252
+ "epoch": 1.7827004219409281,
1253
+ "grad_norm": 3.354079484939575,
1254
+ "learning_rate": 9.250571409186482e-06,
1255
+ "loss": 2.1925,
1256
+ "step": 1690
1257
+ },
1258
+ {
1259
+ "epoch": 1.7932489451476794,
1260
+ "grad_norm": 3.1672236919403076,
1261
+ "learning_rate": 9.24180402685772e-06,
1262
+ "loss": 2.1545,
1263
+ "step": 1700
1264
+ },
1265
+ {
1266
+ "epoch": 1.8037974683544302,
1267
+ "grad_norm": 3.566680431365967,
1268
+ "learning_rate": 9.232989863725826e-06,
1269
+ "loss": 2.2048,
1270
+ "step": 1710
1271
+ },
1272
+ {
1273
+ "epoch": 1.8143459915611815,
1274
+ "grad_norm": 3.8227975368499756,
1275
+ "learning_rate": 9.224129016997938e-06,
1276
+ "loss": 2.1381,
1277
+ "step": 1720
1278
+ },
1279
+ {
1280
+ "epoch": 1.8248945147679325,
1281
+ "grad_norm": 3.222903251647949,
1282
+ "learning_rate": 9.215221584396044e-06,
1283
+ "loss": 2.2643,
1284
+ "step": 1730
1285
+ },
1286
+ {
1287
+ "epoch": 1.8354430379746836,
1288
+ "grad_norm": 3.442078113555908,
1289
+ "learning_rate": 9.206267664155906e-06,
1290
+ "loss": 2.1094,
1291
+ "step": 1740
1292
+ },
1293
+ {
1294
+ "epoch": 1.8459915611814346,
1295
+ "grad_norm": 3.3328030109405518,
1296
+ "learning_rate": 9.197267355025978e-06,
1297
+ "loss": 2.1904,
1298
+ "step": 1750
1299
+ },
1300
+ {
1301
+ "epoch": 1.8565400843881856,
1302
+ "grad_norm": 3.0517797470092773,
1303
+ "learning_rate": 9.188220756266309e-06,
1304
+ "loss": 2.1853,
1305
+ "step": 1760
1306
+ },
1307
+ {
1308
+ "epoch": 1.8670886075949367,
1309
+ "grad_norm": 3.536043643951416,
1310
+ "learning_rate": 9.17912796764746e-06,
1311
+ "loss": 2.2309,
1312
+ "step": 1770
1313
+ },
1314
+ {
1315
+ "epoch": 1.8776371308016877,
1316
+ "grad_norm": 4.112937927246094,
1317
+ "learning_rate": 9.16998908944939e-06,
1318
+ "loss": 2.153,
1319
+ "step": 1780
1320
+ },
1321
+ {
1322
+ "epoch": 1.888185654008439,
1323
+ "grad_norm": 3.280872106552124,
1324
+ "learning_rate": 9.160804222460369e-06,
1325
+ "loss": 2.128,
1326
+ "step": 1790
1327
+ },
1328
+ {
1329
+ "epoch": 1.8987341772151898,
1330
+ "grad_norm": 3.3007278442382812,
1331
+ "learning_rate": 9.151573467975846e-06,
1332
+ "loss": 2.1031,
1333
+ "step": 1800
1334
+ },
1335
+ {
1336
+ "epoch": 1.8987341772151898,
1337
+ "eval_loss": 2.2046918869018555,
1338
+ "eval_runtime": 73.9393,
1339
+ "eval_samples_per_second": 36.192,
1340
+ "eval_steps_per_second": 2.272,
1341
+ "step": 1800
1342
+ },
1343
+ {
1344
+ "epoch": 1.909282700421941,
1345
+ "grad_norm": 3.3225743770599365,
1346
+ "learning_rate": 9.14229692779734e-06,
1347
+ "loss": 2.158,
1348
+ "step": 1810
1349
+ },
1350
+ {
1351
+ "epoch": 1.9198312236286919,
1352
+ "grad_norm": 3.8517839908599854,
1353
+ "learning_rate": 9.132974704231328e-06,
1354
+ "loss": 2.241,
1355
+ "step": 1820
1356
+ },
1357
+ {
1358
+ "epoch": 1.9303797468354431,
1359
+ "grad_norm": 3.798785448074341,
1360
+ "learning_rate": 9.123606900088096e-06,
1361
+ "loss": 2.155,
1362
+ "step": 1830
1363
+ },
1364
+ {
1365
+ "epoch": 1.9409282700421941,
1366
+ "grad_norm": 3.634222984313965,
1367
+ "learning_rate": 9.114193618680623e-06,
1368
+ "loss": 2.2301,
1369
+ "step": 1840
1370
+ },
1371
+ {
1372
+ "epoch": 1.9514767932489452,
1373
+ "grad_norm": 3.9392707347869873,
1374
+ "learning_rate": 9.104734963823431e-06,
1375
+ "loss": 2.1955,
1376
+ "step": 1850
1377
+ },
1378
+ {
1379
+ "epoch": 1.9620253164556962,
1380
+ "grad_norm": 3.729219675064087,
1381
+ "learning_rate": 9.095231039831449e-06,
1382
+ "loss": 2.0308,
1383
+ "step": 1860
1384
+ },
1385
+ {
1386
+ "epoch": 1.9725738396624473,
1387
+ "grad_norm": 3.1710288524627686,
1388
+ "learning_rate": 9.08568195151885e-06,
1389
+ "loss": 2.2114,
1390
+ "step": 1870
1391
+ },
1392
+ {
1393
+ "epoch": 1.9831223628691983,
1394
+ "grad_norm": 3.749756097793579,
1395
+ "learning_rate": 9.076087804197907e-06,
1396
+ "loss": 2.1575,
1397
+ "step": 1880
1398
+ },
1399
+ {
1400
+ "epoch": 1.9936708860759493,
1401
+ "grad_norm": 3.3339526653289795,
1402
+ "learning_rate": 9.066448703677828e-06,
1403
+ "loss": 2.2289,
1404
+ "step": 1890
1405
+ },
1406
+ {
1407
+ "epoch": 2.0042194092827006,
1408
+ "grad_norm": 3.197739839553833,
1409
+ "learning_rate": 9.056764756263585e-06,
1410
+ "loss": 2.2093,
1411
+ "step": 1900
1412
+ },
1413
+ {
1414
+ "epoch": 2.0147679324894514,
1415
+ "grad_norm": 4.1469621658325195,
1416
+ "learning_rate": 9.047036068754745e-06,
1417
+ "loss": 2.0887,
1418
+ "step": 1910
1419
+ },
1420
+ {
1421
+ "epoch": 2.0253164556962027,
1422
+ "grad_norm": 3.831437587738037,
1423
+ "learning_rate": 9.037262748444296e-06,
1424
+ "loss": 2.0847,
1425
+ "step": 1920
1426
+ },
1427
+ {
1428
+ "epoch": 2.0358649789029535,
1429
+ "grad_norm": 3.6010704040527344,
1430
+ "learning_rate": 9.027444903117453e-06,
1431
+ "loss": 2.1559,
1432
+ "step": 1930
1433
+ },
1434
+ {
1435
+ "epoch": 2.0464135021097047,
1436
+ "grad_norm": 3.7649097442626953,
1437
+ "learning_rate": 9.017582641050481e-06,
1438
+ "loss": 2.0566,
1439
+ "step": 1940
1440
+ },
1441
+ {
1442
+ "epoch": 2.0569620253164556,
1443
+ "grad_norm": 3.78544545173645,
1444
+ "learning_rate": 9.007676071009492e-06,
1445
+ "loss": 2.0669,
1446
+ "step": 1950
1447
+ },
1448
+ {
1449
+ "epoch": 2.067510548523207,
1450
+ "grad_norm": 3.843498945236206,
1451
+ "learning_rate": 8.997725302249255e-06,
1452
+ "loss": 2.0457,
1453
+ "step": 1960
1454
+ },
1455
+ {
1456
+ "epoch": 2.0780590717299576,
1457
+ "grad_norm": 3.8778369426727295,
1458
+ "learning_rate": 8.98773044451198e-06,
1459
+ "loss": 2.0429,
1460
+ "step": 1970
1461
+ },
1462
+ {
1463
+ "epoch": 2.088607594936709,
1464
+ "grad_norm": 3.7618846893310547,
1465
+ "learning_rate": 8.977691608026112e-06,
1466
+ "loss": 2.1797,
1467
+ "step": 1980
1468
+ },
1469
+ {
1470
+ "epoch": 2.0991561181434597,
1471
+ "grad_norm": 3.520826816558838,
1472
+ "learning_rate": 8.967608903505119e-06,
1473
+ "loss": 2.1534,
1474
+ "step": 1990
1475
+ },
1476
+ {
1477
+ "epoch": 2.109704641350211,
1478
+ "grad_norm": 4.119839668273926,
1479
+ "learning_rate": 8.957482442146271e-06,
1480
+ "loss": 2.1546,
1481
+ "step": 2000
1482
+ },
1483
+ {
1484
+ "epoch": 2.109704641350211,
1485
+ "eval_loss": 2.2013118267059326,
1486
+ "eval_runtime": 86.6956,
1487
+ "eval_samples_per_second": 30.867,
1488
+ "eval_steps_per_second": 1.938,
1489
+ "step": 2000
1490
+ },
1491
+ {
1492
+ "epoch": 2.1202531645569622,
1493
+ "grad_norm": 3.69824481010437,
1494
+ "learning_rate": 8.947312335629407e-06,
1495
+ "loss": 2.0965,
1496
+ "step": 2010
1497
+ },
1498
+ {
1499
+ "epoch": 2.130801687763713,
1500
+ "grad_norm": 3.8636562824249268,
1501
+ "learning_rate": 8.937098696115707e-06,
1502
+ "loss": 2.0863,
1503
+ "step": 2020
1504
+ },
1505
+ {
1506
+ "epoch": 2.1413502109704643,
1507
+ "grad_norm": 3.984504222869873,
1508
+ "learning_rate": 8.926841636246457e-06,
1509
+ "loss": 2.0715,
1510
+ "step": 2030
1511
+ },
1512
+ {
1513
+ "epoch": 2.151898734177215,
1514
+ "grad_norm": 4.352499961853027,
1515
+ "learning_rate": 8.916541269141807e-06,
1516
+ "loss": 2.1533,
1517
+ "step": 2040
1518
+ },
1519
+ {
1520
+ "epoch": 2.1624472573839664,
1521
+ "grad_norm": 3.425851583480835,
1522
+ "learning_rate": 8.906197708399517e-06,
1523
+ "loss": 2.1041,
1524
+ "step": 2050
1525
+ },
1526
+ {
1527
+ "epoch": 2.172995780590717,
1528
+ "grad_norm": 4.200315952301025,
1529
+ "learning_rate": 8.895811068093711e-06,
1530
+ "loss": 2.0914,
1531
+ "step": 2060
1532
+ },
1533
+ {
1534
+ "epoch": 2.1835443037974684,
1535
+ "grad_norm": 5.192602157592773,
1536
+ "learning_rate": 8.885381462773618e-06,
1537
+ "loss": 2.1045,
1538
+ "step": 2070
1539
+ },
1540
+ {
1541
+ "epoch": 2.1940928270042193,
1542
+ "grad_norm": 4.468735694885254,
1543
+ "learning_rate": 8.874909007462306e-06,
1544
+ "loss": 2.0893,
1545
+ "step": 2080
1546
+ },
1547
+ {
1548
+ "epoch": 2.2046413502109705,
1549
+ "grad_norm": 4.539761066436768,
1550
+ "learning_rate": 8.864393817655414e-06,
1551
+ "loss": 2.0654,
1552
+ "step": 2090
1553
+ },
1554
+ {
1555
+ "epoch": 2.2151898734177213,
1556
+ "grad_norm": 4.390056610107422,
1557
+ "learning_rate": 8.85383600931988e-06,
1558
+ "loss": 2.0858,
1559
+ "step": 2100
1560
+ },
1561
+ {
1562
+ "epoch": 2.2257383966244726,
1563
+ "grad_norm": 4.019690990447998,
1564
+ "learning_rate": 8.843235698892661e-06,
1565
+ "loss": 2.1081,
1566
+ "step": 2110
1567
+ },
1568
+ {
1569
+ "epoch": 2.2362869198312234,
1570
+ "grad_norm": 4.380643367767334,
1571
+ "learning_rate": 8.83259300327945e-06,
1572
+ "loss": 2.0731,
1573
+ "step": 2120
1574
+ },
1575
+ {
1576
+ "epoch": 2.2468354430379747,
1577
+ "grad_norm": 3.870185375213623,
1578
+ "learning_rate": 8.821908039853384e-06,
1579
+ "loss": 2.1553,
1580
+ "step": 2130
1581
+ },
1582
+ {
1583
+ "epoch": 2.257383966244726,
1584
+ "grad_norm": 4.021355628967285,
1585
+ "learning_rate": 8.811180926453753e-06,
1586
+ "loss": 2.0808,
1587
+ "step": 2140
1588
+ },
1589
+ {
1590
+ "epoch": 2.2679324894514767,
1591
+ "grad_norm": 4.625735759735107,
1592
+ "learning_rate": 8.800411781384695e-06,
1593
+ "loss": 2.1289,
1594
+ "step": 2150
1595
+ },
1596
+ {
1597
+ "epoch": 2.278481012658228,
1598
+ "grad_norm": 4.156709671020508,
1599
+ "learning_rate": 8.7896007234139e-06,
1600
+ "loss": 2.0573,
1601
+ "step": 2160
1602
+ },
1603
+ {
1604
+ "epoch": 2.289029535864979,
1605
+ "grad_norm": 4.739879131317139,
1606
+ "learning_rate": 8.778747871771293e-06,
1607
+ "loss": 2.1166,
1608
+ "step": 2170
1609
+ },
1610
+ {
1611
+ "epoch": 2.29957805907173,
1612
+ "grad_norm": 4.324994087219238,
1613
+ "learning_rate": 8.767853346147718e-06,
1614
+ "loss": 2.1079,
1615
+ "step": 2180
1616
+ },
1617
+ {
1618
+ "epoch": 2.310126582278481,
1619
+ "grad_norm": 4.150203227996826,
1620
+ "learning_rate": 8.756917266693628e-06,
1621
+ "loss": 2.148,
1622
+ "step": 2190
1623
+ },
1624
+ {
1625
+ "epoch": 2.320675105485232,
1626
+ "grad_norm": 4.3915934562683105,
1627
+ "learning_rate": 8.745939754017744e-06,
1628
+ "loss": 2.0292,
1629
+ "step": 2200
1630
+ },
1631
+ {
1632
+ "epoch": 2.320675105485232,
1633
+ "eval_loss": 2.196009635925293,
1634
+ "eval_runtime": 72.9383,
1635
+ "eval_samples_per_second": 36.689,
1636
+ "eval_steps_per_second": 2.303,
1637
+ "step": 2200
1638
+ },
1639
+ {
1640
+ "epoch": 2.331223628691983,
1641
+ "grad_norm": 4.6052117347717285,
1642
+ "learning_rate": 8.734920929185744e-06,
1643
+ "loss": 2.0629,
1644
+ "step": 2210
1645
+ },
1646
+ {
1647
+ "epoch": 2.3417721518987342,
1648
+ "grad_norm": 4.201635837554932,
1649
+ "learning_rate": 8.72386091371891e-06,
1650
+ "loss": 2.1202,
1651
+ "step": 2220
1652
+ },
1653
+ {
1654
+ "epoch": 2.352320675105485,
1655
+ "grad_norm": 3.790405750274658,
1656
+ "learning_rate": 8.712759829592798e-06,
1657
+ "loss": 2.0787,
1658
+ "step": 2230
1659
+ },
1660
+ {
1661
+ "epoch": 2.3628691983122363,
1662
+ "grad_norm": 4.233027935028076,
1663
+ "learning_rate": 8.701617799235896e-06,
1664
+ "loss": 2.1352,
1665
+ "step": 2240
1666
+ },
1667
+ {
1668
+ "epoch": 2.3734177215189876,
1669
+ "grad_norm": 4.563284397125244,
1670
+ "learning_rate": 8.690434945528255e-06,
1671
+ "loss": 2.127,
1672
+ "step": 2250
1673
+ },
1674
+ {
1675
+ "epoch": 2.3839662447257384,
1676
+ "grad_norm": 3.9794418811798096,
1677
+ "learning_rate": 8.67921139180016e-06,
1678
+ "loss": 2.047,
1679
+ "step": 2260
1680
+ },
1681
+ {
1682
+ "epoch": 2.3945147679324896,
1683
+ "grad_norm": 4.363206386566162,
1684
+ "learning_rate": 8.66794726183075e-06,
1685
+ "loss": 2.0607,
1686
+ "step": 2270
1687
+ },
1688
+ {
1689
+ "epoch": 2.4050632911392404,
1690
+ "grad_norm": 3.976658821105957,
1691
+ "learning_rate": 8.656642679846661e-06,
1692
+ "loss": 2.0438,
1693
+ "step": 2280
1694
+ },
1695
+ {
1696
+ "epoch": 2.4156118143459917,
1697
+ "grad_norm": 4.318053722381592,
1698
+ "learning_rate": 8.645297770520656e-06,
1699
+ "loss": 2.1104,
1700
+ "step": 2290
1701
+ },
1702
+ {
1703
+ "epoch": 2.4261603375527425,
1704
+ "grad_norm": 3.9278979301452637,
1705
+ "learning_rate": 8.633912658970247e-06,
1706
+ "loss": 2.0841,
1707
+ "step": 2300
1708
+ },
1709
+ {
1710
+ "epoch": 2.4367088607594938,
1711
+ "grad_norm": 3.95326566696167,
1712
+ "learning_rate": 8.62248747075632e-06,
1713
+ "loss": 2.1109,
1714
+ "step": 2310
1715
+ },
1716
+ {
1717
+ "epoch": 2.4472573839662446,
1718
+ "grad_norm": 4.131571292877197,
1719
+ "learning_rate": 8.611022331881742e-06,
1720
+ "loss": 2.1062,
1721
+ "step": 2320
1722
+ },
1723
+ {
1724
+ "epoch": 2.457805907172996,
1725
+ "grad_norm": 4.508193492889404,
1726
+ "learning_rate": 8.599517368789981e-06,
1727
+ "loss": 2.0352,
1728
+ "step": 2330
1729
+ },
1730
+ {
1731
+ "epoch": 2.4683544303797467,
1732
+ "grad_norm": 4.229351997375488,
1733
+ "learning_rate": 8.587972708363703e-06,
1734
+ "loss": 2.0218,
1735
+ "step": 2340
1736
+ },
1737
+ {
1738
+ "epoch": 2.478902953586498,
1739
+ "grad_norm": 4.326112747192383,
1740
+ "learning_rate": 8.576388477923384e-06,
1741
+ "loss": 2.0751,
1742
+ "step": 2350
1743
+ },
1744
+ {
1745
+ "epoch": 2.489451476793249,
1746
+ "grad_norm": 3.9723427295684814,
1747
+ "learning_rate": 8.564764805225887e-06,
1748
+ "loss": 2.1476,
1749
+ "step": 2360
1750
+ },
1751
+ {
1752
+ "epoch": 2.5,
1753
+ "grad_norm": 4.076062202453613,
1754
+ "learning_rate": 8.553101818463077e-06,
1755
+ "loss": 2.025,
1756
+ "step": 2370
1757
+ },
1758
+ {
1759
+ "epoch": 2.510548523206751,
1760
+ "grad_norm": 4.194293022155762,
1761
+ "learning_rate": 8.541399646260384e-06,
1762
+ "loss": 2.059,
1763
+ "step": 2380
1764
+ },
1765
+ {
1766
+ "epoch": 2.521097046413502,
1767
+ "grad_norm": 4.295434474945068,
1768
+ "learning_rate": 8.529658417675403e-06,
1769
+ "loss": 2.1333,
1770
+ "step": 2390
1771
+ },
1772
+ {
1773
+ "epoch": 2.5316455696202533,
1774
+ "grad_norm": 6.034122943878174,
1775
+ "learning_rate": 8.517878262196462e-06,
1776
+ "loss": 2.1621,
1777
+ "step": 2400
1778
+ },
1779
+ {
1780
+ "epoch": 2.5316455696202533,
1781
+ "eval_loss": 2.194173574447632,
1782
+ "eval_runtime": 72.3651,
1783
+ "eval_samples_per_second": 36.979,
1784
+ "eval_steps_per_second": 2.322,
1785
+ "step": 2400
1786
+ },
1787
+ {
1788
+ "epoch": 2.542194092827004,
1789
+ "grad_norm": 4.601003646850586,
1790
+ "learning_rate": 8.506059309741188e-06,
1791
+ "loss": 2.123,
1792
+ "step": 2410
1793
+ },
1794
+ {
1795
+ "epoch": 2.5527426160337554,
1796
+ "grad_norm": 5.238481044769287,
1797
+ "learning_rate": 8.494201690655088e-06,
1798
+ "loss": 2.0147,
1799
+ "step": 2420
1800
+ },
1801
+ {
1802
+ "epoch": 2.5632911392405062,
1803
+ "grad_norm": 3.8078460693359375,
1804
+ "learning_rate": 8.482305535710105e-06,
1805
+ "loss": 2.041,
1806
+ "step": 2430
1807
+ },
1808
+ {
1809
+ "epoch": 2.5738396624472575,
1810
+ "grad_norm": 4.147160053253174,
1811
+ "learning_rate": 8.470370976103171e-06,
1812
+ "loss": 2.0651,
1813
+ "step": 2440
1814
+ },
1815
+ {
1816
+ "epoch": 2.5843881856540083,
1817
+ "grad_norm": 4.635336875915527,
1818
+ "learning_rate": 8.458398143454765e-06,
1819
+ "loss": 2.0387,
1820
+ "step": 2450
1821
+ },
1822
+ {
1823
+ "epoch": 2.5949367088607596,
1824
+ "grad_norm": 4.513761520385742,
1825
+ "learning_rate": 8.446387169807463e-06,
1826
+ "loss": 2.0266,
1827
+ "step": 2460
1828
+ },
1829
+ {
1830
+ "epoch": 2.605485232067511,
1831
+ "grad_norm": 4.775442123413086,
1832
+ "learning_rate": 8.43433818762448e-06,
1833
+ "loss": 2.0809,
1834
+ "step": 2470
1835
+ },
1836
+ {
1837
+ "epoch": 2.6160337552742616,
1838
+ "grad_norm": 5.595143795013428,
1839
+ "learning_rate": 8.422251329788207e-06,
1840
+ "loss": 2.1158,
1841
+ "step": 2480
1842
+ },
1843
+ {
1844
+ "epoch": 2.6265822784810124,
1845
+ "grad_norm": 6.713454246520996,
1846
+ "learning_rate": 8.410126729598746e-06,
1847
+ "loss": 2.0841,
1848
+ "step": 2490
1849
+ },
1850
+ {
1851
+ "epoch": 2.6371308016877637,
1852
+ "grad_norm": 4.723282814025879,
1853
+ "learning_rate": 8.397964520772446e-06,
1854
+ "loss": 2.0767,
1855
+ "step": 2500
1856
+ },
1857
+ {
1858
+ "epoch": 2.647679324894515,
1859
+ "grad_norm": 4.671931266784668,
1860
+ "learning_rate": 8.38576483744042e-06,
1861
+ "loss": 2.2249,
1862
+ "step": 2510
1863
+ },
1864
+ {
1865
+ "epoch": 2.6582278481012658,
1866
+ "grad_norm": 4.853333473205566,
1867
+ "learning_rate": 8.373527814147067e-06,
1868
+ "loss": 2.0802,
1869
+ "step": 2520
1870
+ },
1871
+ {
1872
+ "epoch": 2.668776371308017,
1873
+ "grad_norm": 4.616205215454102,
1874
+ "learning_rate": 8.361253585848592e-06,
1875
+ "loss": 2.0513,
1876
+ "step": 2530
1877
+ },
1878
+ {
1879
+ "epoch": 2.679324894514768,
1880
+ "grad_norm": 5.052831172943115,
1881
+ "learning_rate": 8.34894228791152e-06,
1882
+ "loss": 2.0663,
1883
+ "step": 2540
1884
+ },
1885
+ {
1886
+ "epoch": 2.689873417721519,
1887
+ "grad_norm": 4.211902618408203,
1888
+ "learning_rate": 8.336594056111197e-06,
1889
+ "loss": 2.1126,
1890
+ "step": 2550
1891
+ },
1892
+ {
1893
+ "epoch": 2.70042194092827,
1894
+ "grad_norm": 4.98234224319458,
1895
+ "learning_rate": 8.324209026630293e-06,
1896
+ "loss": 2.098,
1897
+ "step": 2560
1898
+ },
1899
+ {
1900
+ "epoch": 2.710970464135021,
1901
+ "grad_norm": 4.354977607727051,
1902
+ "learning_rate": 8.311787336057298e-06,
1903
+ "loss": 2.0381,
1904
+ "step": 2570
1905
+ },
1906
+ {
1907
+ "epoch": 2.721518987341772,
1908
+ "grad_norm": 4.826476573944092,
1909
+ "learning_rate": 8.299329121385027e-06,
1910
+ "loss": 2.0821,
1911
+ "step": 2580
1912
+ },
1913
+ {
1914
+ "epoch": 2.7320675105485233,
1915
+ "grad_norm": 5.218892574310303,
1916
+ "learning_rate": 8.286834520009101e-06,
1917
+ "loss": 2.0833,
1918
+ "step": 2590
1919
+ },
1920
+ {
1921
+ "epoch": 2.742616033755274,
1922
+ "grad_norm": 4.196876049041748,
1923
+ "learning_rate": 8.274303669726427e-06,
1924
+ "loss": 2.1634,
1925
+ "step": 2600
1926
+ },
1927
+ {
1928
+ "epoch": 2.742616033755274,
1929
+ "eval_loss": 2.186359167098999,
1930
+ "eval_runtime": 72.3827,
1931
+ "eval_samples_per_second": 36.97,
1932
+ "eval_steps_per_second": 2.321,
1933
+ "step": 2600
1934
+ },
1935
+ {
1936
+ "epoch": 2.7531645569620253,
1937
+ "grad_norm": 4.207483291625977,
1938
+ "learning_rate": 8.261736708733684e-06,
1939
+ "loss": 2.0848,
1940
+ "step": 2610
1941
+ },
1942
+ {
1943
+ "epoch": 2.7637130801687766,
1944
+ "grad_norm": 4.518768787384033,
1945
+ "learning_rate": 8.249133775625809e-06,
1946
+ "loss": 2.0817,
1947
+ "step": 2620
1948
+ },
1949
+ {
1950
+ "epoch": 2.7742616033755274,
1951
+ "grad_norm": 4.668468475341797,
1952
+ "learning_rate": 8.236495009394442e-06,
1953
+ "loss": 2.1302,
1954
+ "step": 2630
1955
+ },
1956
+ {
1957
+ "epoch": 2.7848101265822782,
1958
+ "grad_norm": 4.788352012634277,
1959
+ "learning_rate": 8.22382054942642e-06,
1960
+ "loss": 2.0826,
1961
+ "step": 2640
1962
+ },
1963
+ {
1964
+ "epoch": 2.7953586497890295,
1965
+ "grad_norm": 4.45465087890625,
1966
+ "learning_rate": 8.21111053550223e-06,
1967
+ "loss": 2.0879,
1968
+ "step": 2650
1969
+ },
1970
+ {
1971
+ "epoch": 2.8059071729957807,
1972
+ "grad_norm": 4.580440998077393,
1973
+ "learning_rate": 8.198365107794457e-06,
1974
+ "loss": 2.1272,
1975
+ "step": 2660
1976
+ },
1977
+ {
1978
+ "epoch": 2.8164556962025316,
1979
+ "grad_norm": 5.202081203460693,
1980
+ "learning_rate": 8.185584406866257e-06,
1981
+ "loss": 2.1168,
1982
+ "step": 2670
1983
+ },
1984
+ {
1985
+ "epoch": 2.827004219409283,
1986
+ "grad_norm": 4.497466564178467,
1987
+ "learning_rate": 8.172768573669789e-06,
1988
+ "loss": 2.0066,
1989
+ "step": 2680
1990
+ },
1991
+ {
1992
+ "epoch": 2.8375527426160336,
1993
+ "grad_norm": 5.110291481018066,
1994
+ "learning_rate": 8.159917749544679e-06,
1995
+ "loss": 2.0987,
1996
+ "step": 2690
1997
+ },
1998
+ {
1999
+ "epoch": 2.848101265822785,
2000
+ "grad_norm": 4.87176513671875,
2001
+ "learning_rate": 8.147032076216439e-06,
2002
+ "loss": 2.0955,
2003
+ "step": 2700
2004
+ },
2005
+ {
2006
+ "epoch": 2.8586497890295357,
2007
+ "grad_norm": 5.162720203399658,
2008
+ "learning_rate": 8.134111695794926e-06,
2009
+ "loss": 2.0514,
2010
+ "step": 2710
2011
+ },
2012
+ {
2013
+ "epoch": 2.869198312236287,
2014
+ "grad_norm": 5.412773609161377,
2015
+ "learning_rate": 8.121156750772761e-06,
2016
+ "loss": 2.0482,
2017
+ "step": 2720
2018
+ },
2019
+ {
2020
+ "epoch": 2.879746835443038,
2021
+ "grad_norm": 4.903331756591797,
2022
+ "learning_rate": 8.10816738402376e-06,
2023
+ "loss": 2.1179,
2024
+ "step": 2730
2025
+ },
2026
+ {
2027
+ "epoch": 2.890295358649789,
2028
+ "grad_norm": 4.829965591430664,
2029
+ "learning_rate": 8.095143738801358e-06,
2030
+ "loss": 2.0328,
2031
+ "step": 2740
2032
+ },
2033
+ {
2034
+ "epoch": 2.90084388185654,
2035
+ "grad_norm": 4.6957197189331055,
2036
+ "learning_rate": 8.082085958737039e-06,
2037
+ "loss": 2.0518,
2038
+ "step": 2750
2039
+ },
2040
+ {
2041
+ "epoch": 2.911392405063291,
2042
+ "grad_norm": 5.155613899230957,
2043
+ "learning_rate": 8.068994187838733e-06,
2044
+ "loss": 2.0637,
2045
+ "step": 2760
2046
+ },
2047
+ {
2048
+ "epoch": 2.9219409282700424,
2049
+ "grad_norm": 4.745687007904053,
2050
+ "learning_rate": 8.055868570489247e-06,
2051
+ "loss": 2.0828,
2052
+ "step": 2770
2053
+ },
2054
+ {
2055
+ "epoch": 2.932489451476793,
2056
+ "grad_norm": 4.50001335144043,
2057
+ "learning_rate": 8.042709251444657e-06,
2058
+ "loss": 2.0771,
2059
+ "step": 2780
2060
+ },
2061
+ {
2062
+ "epoch": 2.9430379746835444,
2063
+ "grad_norm": 4.297122001647949,
2064
+ "learning_rate": 8.029516375832727e-06,
2065
+ "loss": 2.0656,
2066
+ "step": 2790
2067
+ },
2068
+ {
2069
+ "epoch": 2.9535864978902953,
2070
+ "grad_norm": 4.995817184448242,
2071
+ "learning_rate": 8.016290089151293e-06,
2072
+ "loss": 2.0756,
2073
+ "step": 2800
2074
+ },
2075
+ {
2076
+ "epoch": 2.9535864978902953,
2077
+ "eval_loss": 2.181023120880127,
2078
+ "eval_runtime": 73.7935,
2079
+ "eval_samples_per_second": 36.263,
2080
+ "eval_steps_per_second": 2.277,
2081
+ "step": 2800
2082
+ },
2083
+ {
2084
+ "epoch": 2.9641350210970465,
2085
+ "grad_norm": 4.539944171905518,
2086
+ "learning_rate": 8.003030537266664e-06,
2087
+ "loss": 2.1362,
2088
+ "step": 2810
2089
+ },
2090
+ {
2091
+ "epoch": 2.9746835443037973,
2092
+ "grad_norm": 4.709123134613037,
2093
+ "learning_rate": 7.989737866412025e-06,
2094
+ "loss": 2.0419,
2095
+ "step": 2820
2096
+ },
2097
+ {
2098
+ "epoch": 2.9852320675105486,
2099
+ "grad_norm": 5.678777694702148,
2100
+ "learning_rate": 7.976412223185804e-06,
2101
+ "loss": 2.117,
2102
+ "step": 2830
2103
+ },
2104
+ {
2105
+ "epoch": 2.9957805907173,
2106
+ "grad_norm": 3.9071388244628906,
2107
+ "learning_rate": 7.96305375455007e-06,
2108
+ "loss": 2.0569,
2109
+ "step": 2840
2110
+ },
2111
+ {
2112
+ "epoch": 3.0063291139240507,
2113
+ "grad_norm": 4.4644670486450195,
2114
+ "learning_rate": 7.949662607828905e-06,
2115
+ "loss": 2.0296,
2116
+ "step": 2850
2117
+ },
2118
+ {
2119
+ "epoch": 3.0168776371308015,
2120
+ "grad_norm": 4.650942802429199,
2121
+ "learning_rate": 7.93623893070679e-06,
2122
+ "loss": 2.0201,
2123
+ "step": 2860
2124
+ },
2125
+ {
2126
+ "epoch": 3.0274261603375527,
2127
+ "grad_norm": 5.012904167175293,
2128
+ "learning_rate": 7.922782871226953e-06,
2129
+ "loss": 2.0149,
2130
+ "step": 2870
2131
+ },
2132
+ {
2133
+ "epoch": 3.037974683544304,
2134
+ "grad_norm": 7.029259204864502,
2135
+ "learning_rate": 7.909294577789765e-06,
2136
+ "loss": 1.9386,
2137
+ "step": 2880
2138
+ },
2139
+ {
2140
+ "epoch": 3.048523206751055,
2141
+ "grad_norm": 5.173169136047363,
2142
+ "learning_rate": 7.895774199151084e-06,
2143
+ "loss": 2.0409,
2144
+ "step": 2890
2145
+ },
2146
+ {
2147
+ "epoch": 3.059071729957806,
2148
+ "grad_norm": 5.033509254455566,
2149
+ "learning_rate": 7.882221884420617e-06,
2150
+ "loss": 2.0249,
2151
+ "step": 2900
2152
+ },
2153
+ {
2154
+ "epoch": 3.069620253164557,
2155
+ "grad_norm": 5.363480091094971,
2156
+ "learning_rate": 7.868637783060288e-06,
2157
+ "loss": 2.0824,
2158
+ "step": 2910
2159
+ },
2160
+ {
2161
+ "epoch": 3.080168776371308,
2162
+ "grad_norm": 5.523759841918945,
2163
+ "learning_rate": 7.855022044882572e-06,
2164
+ "loss": 1.9965,
2165
+ "step": 2920
2166
+ },
2167
+ {
2168
+ "epoch": 3.090717299578059,
2169
+ "grad_norm": 5.557906150817871,
2170
+ "learning_rate": 7.841374820048854e-06,
2171
+ "loss": 1.9177,
2172
+ "step": 2930
2173
+ },
2174
+ {
2175
+ "epoch": 3.1012658227848102,
2176
+ "grad_norm": 5.998488426208496,
2177
+ "learning_rate": 7.82769625906777e-06,
2178
+ "loss": 2.0116,
2179
+ "step": 2940
2180
+ },
2181
+ {
2182
+ "epoch": 3.111814345991561,
2183
+ "grad_norm": 4.81965446472168,
2184
+ "learning_rate": 7.813986512793546e-06,
2185
+ "loss": 2.0253,
2186
+ "step": 2950
2187
+ },
2188
+ {
2189
+ "epoch": 3.1223628691983123,
2190
+ "grad_norm": 6.071234703063965,
2191
+ "learning_rate": 7.80024573242434e-06,
2192
+ "loss": 2.0284,
2193
+ "step": 2960
2194
+ },
2195
+ {
2196
+ "epoch": 3.132911392405063,
2197
+ "grad_norm": 6.464727878570557,
2198
+ "learning_rate": 7.786474069500564e-06,
2199
+ "loss": 1.9866,
2200
+ "step": 2970
2201
+ },
2202
+ {
2203
+ "epoch": 3.1434599156118144,
2204
+ "grad_norm": 5.230690002441406,
2205
+ "learning_rate": 7.772671675903222e-06,
2206
+ "loss": 1.9765,
2207
+ "step": 2980
2208
+ },
2209
+ {
2210
+ "epoch": 3.1540084388185656,
2211
+ "grad_norm": 6.096700668334961,
2212
+ "learning_rate": 7.75883870385223e-06,
2213
+ "loss": 2.0725,
2214
+ "step": 2990
2215
+ },
2216
+ {
2217
+ "epoch": 3.1645569620253164,
2218
+ "grad_norm": 5.314362525939941,
2219
+ "learning_rate": 7.744975305904742e-06,
2220
+ "loss": 2.0085,
2221
+ "step": 3000
2222
+ },
2223
+ {
2224
+ "epoch": 3.1645569620253164,
2225
+ "eval_loss": 2.19728946685791,
2226
+ "eval_runtime": 72.3767,
2227
+ "eval_samples_per_second": 36.973,
2228
+ "eval_steps_per_second": 2.321,
2229
+ "step": 3000
2230
+ },
2231
+ {
2232
+ "epoch": 3.1751054852320677,
2233
+ "grad_norm": 5.176684379577637,
2234
+ "learning_rate": 7.731081634953464e-06,
2235
+ "loss": 1.9285,
2236
+ "step": 3010
2237
+ },
2238
+ {
2239
+ "epoch": 3.1856540084388185,
2240
+ "grad_norm": 5.523383140563965,
2241
+ "learning_rate": 7.717157844224962e-06,
2242
+ "loss": 2.0336,
2243
+ "step": 3020
2244
+ },
2245
+ {
2246
+ "epoch": 3.1962025316455698,
2247
+ "grad_norm": 4.590834617614746,
2248
+ "learning_rate": 7.703204087277989e-06,
2249
+ "loss": 1.9743,
2250
+ "step": 3030
2251
+ },
2252
+ {
2253
+ "epoch": 3.2067510548523206,
2254
+ "grad_norm": 5.178727149963379,
2255
+ "learning_rate": 7.689220518001771e-06,
2256
+ "loss": 1.942,
2257
+ "step": 3040
2258
+ },
2259
+ {
2260
+ "epoch": 3.217299578059072,
2261
+ "grad_norm": 5.583121299743652,
2262
+ "learning_rate": 7.675207290614326e-06,
2263
+ "loss": 1.9774,
2264
+ "step": 3050
2265
+ },
2266
+ {
2267
+ "epoch": 3.2278481012658227,
2268
+ "grad_norm": 5.265313625335693,
2269
+ "learning_rate": 7.661164559660754e-06,
2270
+ "loss": 1.9833,
2271
+ "step": 3060
2272
+ },
2273
+ {
2274
+ "epoch": 3.238396624472574,
2275
+ "grad_norm": 5.874401092529297,
2276
+ "learning_rate": 7.64709248001154e-06,
2277
+ "loss": 1.964,
2278
+ "step": 3070
2279
+ },
2280
+ {
2281
+ "epoch": 3.2489451476793247,
2282
+ "grad_norm": 5.494770050048828,
2283
+ "learning_rate": 7.632991206860831e-06,
2284
+ "loss": 1.9016,
2285
+ "step": 3080
2286
+ },
2287
+ {
2288
+ "epoch": 3.259493670886076,
2289
+ "grad_norm": 5.163134574890137,
2290
+ "learning_rate": 7.618860895724749e-06,
2291
+ "loss": 2.0045,
2292
+ "step": 3090
2293
+ },
2294
+ {
2295
+ "epoch": 3.270042194092827,
2296
+ "grad_norm": 5.281393051147461,
2297
+ "learning_rate": 7.604701702439652e-06,
2298
+ "loss": 1.9715,
2299
+ "step": 3100
2300
+ },
2301
+ {
2302
+ "epoch": 3.280590717299578,
2303
+ "grad_norm": 5.9331583976745605,
2304
+ "learning_rate": 7.590513783160429e-06,
2305
+ "loss": 2.0094,
2306
+ "step": 3110
2307
+ },
2308
+ {
2309
+ "epoch": 3.291139240506329,
2310
+ "grad_norm": 6.16027307510376,
2311
+ "learning_rate": 7.576297294358772e-06,
2312
+ "loss": 1.9725,
2313
+ "step": 3120
2314
+ },
2315
+ {
2316
+ "epoch": 3.30168776371308,
2317
+ "grad_norm": 5.6299519538879395,
2318
+ "learning_rate": 7.562052392821454e-06,
2319
+ "loss": 1.9742,
2320
+ "step": 3130
2321
+ },
2322
+ {
2323
+ "epoch": 3.3122362869198314,
2324
+ "grad_norm": 6.122225761413574,
2325
+ "learning_rate": 7.547779235648598e-06,
2326
+ "loss": 1.9856,
2327
+ "step": 3140
2328
+ },
2329
+ {
2330
+ "epoch": 3.3227848101265822,
2331
+ "grad_norm": 5.394702911376953,
2332
+ "learning_rate": 7.533477980251947e-06,
2333
+ "loss": 1.9929,
2334
+ "step": 3150
2335
+ },
2336
+ {
2337
+ "epoch": 3.3333333333333335,
2338
+ "grad_norm": 5.1701836585998535,
2339
+ "learning_rate": 7.519148784353122e-06,
2340
+ "loss": 2.0319,
2341
+ "step": 3160
2342
+ },
2343
+ {
2344
+ "epoch": 3.3438818565400843,
2345
+ "grad_norm": 6.699225425720215,
2346
+ "learning_rate": 7.504791805981886e-06,
2347
+ "loss": 2.0251,
2348
+ "step": 3170
2349
+ },
2350
+ {
2351
+ "epoch": 3.3544303797468356,
2352
+ "grad_norm": 6.399612903594971,
2353
+ "learning_rate": 7.490407203474403e-06,
2354
+ "loss": 2.0114,
2355
+ "step": 3180
2356
+ },
2357
+ {
2358
+ "epoch": 3.3649789029535864,
2359
+ "grad_norm": 5.491964817047119,
2360
+ "learning_rate": 7.475995135471496e-06,
2361
+ "loss": 2.043,
2362
+ "step": 3190
2363
+ },
2364
+ {
2365
+ "epoch": 3.3755274261603376,
2366
+ "grad_norm": 5.911776542663574,
2367
+ "learning_rate": 7.46155576091688e-06,
2368
+ "loss": 2.0527,
2369
+ "step": 3200
2370
+ },
2371
+ {
2372
+ "epoch": 3.3755274261603376,
2373
+ "eval_loss": 2.1922879219055176,
2374
+ "eval_runtime": 72.3477,
2375
+ "eval_samples_per_second": 36.988,
2376
+ "eval_steps_per_second": 2.322,
2377
+ "step": 3200
2378
+ },
2379
+ {
2380
+ "epoch": 3.3860759493670884,
2381
+ "grad_norm": 5.200326919555664,
2382
+ "learning_rate": 7.447089239055428e-06,
2383
+ "loss": 2.0072,
2384
+ "step": 3210
2385
+ },
2386
+ {
2387
+ "epoch": 3.3966244725738397,
2388
+ "grad_norm": 6.464207649230957,
2389
+ "learning_rate": 7.432595729431408e-06,
2390
+ "loss": 1.9955,
2391
+ "step": 3220
2392
+ },
2393
+ {
2394
+ "epoch": 3.4071729957805905,
2395
+ "grad_norm": 6.107498645782471,
2396
+ "learning_rate": 7.418075391886721e-06,
2397
+ "loss": 1.9306,
2398
+ "step": 3230
2399
+ },
2400
+ {
2401
+ "epoch": 3.4177215189873418,
2402
+ "grad_norm": 5.439554691314697,
2403
+ "learning_rate": 7.403528386559138e-06,
2404
+ "loss": 1.95,
2405
+ "step": 3240
2406
+ },
2407
+ {
2408
+ "epoch": 3.428270042194093,
2409
+ "grad_norm": 5.776865482330322,
2410
+ "learning_rate": 7.388954873880542e-06,
2411
+ "loss": 2.0344,
2412
+ "step": 3250
2413
+ },
2414
+ {
2415
+ "epoch": 3.438818565400844,
2416
+ "grad_norm": 5.6310224533081055,
2417
+ "learning_rate": 7.374355014575148e-06,
2418
+ "loss": 1.9635,
2419
+ "step": 3260
2420
+ },
2421
+ {
2422
+ "epoch": 3.449367088607595,
2423
+ "grad_norm": 6.140701770782471,
2424
+ "learning_rate": 7.359728969657734e-06,
2425
+ "loss": 2.0356,
2426
+ "step": 3270
2427
+ },
2428
+ {
2429
+ "epoch": 3.459915611814346,
2430
+ "grad_norm": 5.916216850280762,
2431
+ "learning_rate": 7.3450769004318714e-06,
2432
+ "loss": 2.0008,
2433
+ "step": 3280
2434
+ },
2435
+ {
2436
+ "epoch": 3.470464135021097,
2437
+ "grad_norm": 5.945311069488525,
2438
+ "learning_rate": 7.330398968488139e-06,
2439
+ "loss": 1.974,
2440
+ "step": 3290
2441
+ },
2442
+ {
2443
+ "epoch": 3.481012658227848,
2444
+ "grad_norm": 5.455053329467773,
2445
+ "learning_rate": 7.315695335702341e-06,
2446
+ "loss": 2.082,
2447
+ "step": 3300
2448
+ },
2449
+ {
2450
+ "epoch": 3.4915611814345993,
2451
+ "grad_norm": 6.291158199310303,
2452
+ "learning_rate": 7.300966164233725e-06,
2453
+ "loss": 2.0138,
2454
+ "step": 3310
2455
+ },
2456
+ {
2457
+ "epoch": 3.50210970464135,
2458
+ "grad_norm": 6.825329303741455,
2459
+ "learning_rate": 7.286211616523193e-06,
2460
+ "loss": 1.9847,
2461
+ "step": 3320
2462
+ },
2463
+ {
2464
+ "epoch": 3.5126582278481013,
2465
+ "grad_norm": 5.787904739379883,
2466
+ "learning_rate": 7.271431855291511e-06,
2467
+ "loss": 2.0,
2468
+ "step": 3330
2469
+ },
2470
+ {
2471
+ "epoch": 3.523206751054852,
2472
+ "grad_norm": 5.826028347015381,
2473
+ "learning_rate": 7.256627043537508e-06,
2474
+ "loss": 1.9622,
2475
+ "step": 3340
2476
+ },
2477
+ {
2478
+ "epoch": 3.5337552742616034,
2479
+ "grad_norm": 5.009519577026367,
2480
+ "learning_rate": 7.241797344536286e-06,
2481
+ "loss": 2.0115,
2482
+ "step": 3350
2483
+ },
2484
+ {
2485
+ "epoch": 3.5443037974683547,
2486
+ "grad_norm": 5.521949291229248,
2487
+ "learning_rate": 7.226942921837419e-06,
2488
+ "loss": 1.9554,
2489
+ "step": 3360
2490
+ },
2491
+ {
2492
+ "epoch": 3.5548523206751055,
2493
+ "grad_norm": 6.4602861404418945,
2494
+ "learning_rate": 7.212063939263139e-06,
2495
+ "loss": 2.0057,
2496
+ "step": 3370
2497
+ },
2498
+ {
2499
+ "epoch": 3.5654008438818563,
2500
+ "grad_norm": 5.9213690757751465,
2501
+ "learning_rate": 7.197160560906547e-06,
2502
+ "loss": 1.9691,
2503
+ "step": 3380
2504
+ },
2505
+ {
2506
+ "epoch": 3.5759493670886076,
2507
+ "grad_norm": 5.320401668548584,
2508
+ "learning_rate": 7.182232951129785e-06,
2509
+ "loss": 1.996,
2510
+ "step": 3390
2511
+ },
2512
+ {
2513
+ "epoch": 3.586497890295359,
2514
+ "grad_norm": 5.877796173095703,
2515
+ "learning_rate": 7.167281274562235e-06,
2516
+ "loss": 1.9061,
2517
+ "step": 3400
2518
+ },
2519
+ {
2520
+ "epoch": 3.586497890295359,
2521
+ "eval_loss": 2.194946765899658,
2522
+ "eval_runtime": 72.3435,
2523
+ "eval_samples_per_second": 36.99,
2524
+ "eval_steps_per_second": 2.322,
2525
+ "step": 3400
2526
+ },
2527
+ {
2528
+ "epoch": 3.5970464135021096,
2529
+ "grad_norm": 6.070914268493652,
2530
+ "learning_rate": 7.152305696098699e-06,
2531
+ "loss": 1.9963,
2532
+ "step": 3410
2533
+ },
2534
+ {
2535
+ "epoch": 3.607594936708861,
2536
+ "grad_norm": 6.039547443389893,
2537
+ "learning_rate": 7.137306380897585e-06,
2538
+ "loss": 1.9624,
2539
+ "step": 3420
2540
+ },
2541
+ {
2542
+ "epoch": 3.6181434599156117,
2543
+ "grad_norm": 5.91978645324707,
2544
+ "learning_rate": 7.122283494379076e-06,
2545
+ "loss": 2.0262,
2546
+ "step": 3430
2547
+ },
2548
+ {
2549
+ "epoch": 3.628691983122363,
2550
+ "grad_norm": 5.852320194244385,
2551
+ "learning_rate": 7.107237202223316e-06,
2552
+ "loss": 1.9985,
2553
+ "step": 3440
2554
+ },
2555
+ {
2556
+ "epoch": 3.6392405063291138,
2557
+ "grad_norm": 6.412103652954102,
2558
+ "learning_rate": 7.092167670368579e-06,
2559
+ "loss": 1.9653,
2560
+ "step": 3450
2561
+ },
2562
+ {
2563
+ "epoch": 3.649789029535865,
2564
+ "grad_norm": 6.605663776397705,
2565
+ "learning_rate": 7.0770750650094335e-06,
2566
+ "loss": 2.0706,
2567
+ "step": 3460
2568
+ },
2569
+ {
2570
+ "epoch": 3.6603375527426163,
2571
+ "grad_norm": 5.84267520904541,
2572
+ "learning_rate": 7.061959552594918e-06,
2573
+ "loss": 1.98,
2574
+ "step": 3470
2575
+ },
2576
+ {
2577
+ "epoch": 3.670886075949367,
2578
+ "grad_norm": 6.904423713684082,
2579
+ "learning_rate": 7.046821299826703e-06,
2580
+ "loss": 2.0789,
2581
+ "step": 3480
2582
+ },
2583
+ {
2584
+ "epoch": 3.681434599156118,
2585
+ "grad_norm": 5.740320205688477,
2586
+ "learning_rate": 7.03166047365725e-06,
2587
+ "loss": 1.92,
2588
+ "step": 3490
2589
+ },
2590
+ {
2591
+ "epoch": 3.691983122362869,
2592
+ "grad_norm": 6.543642044067383,
2593
+ "learning_rate": 7.016477241287969e-06,
2594
+ "loss": 2.0346,
2595
+ "step": 3500
2596
+ },
2597
+ {
2598
+ "epoch": 3.7025316455696204,
2599
+ "grad_norm": 5.7007293701171875,
2600
+ "learning_rate": 7.001271770167383e-06,
2601
+ "loss": 1.9209,
2602
+ "step": 3510
2603
+ },
2604
+ {
2605
+ "epoch": 3.7130801687763713,
2606
+ "grad_norm": 6.128006935119629,
2607
+ "learning_rate": 6.9860442279892686e-06,
2608
+ "loss": 2.0118,
2609
+ "step": 3520
2610
+ },
2611
+ {
2612
+ "epoch": 3.7236286919831225,
2613
+ "grad_norm": 6.305457592010498,
2614
+ "learning_rate": 6.970794782690817e-06,
2615
+ "loss": 1.9973,
2616
+ "step": 3530
2617
+ },
2618
+ {
2619
+ "epoch": 3.7341772151898733,
2620
+ "grad_norm": 6.509912490844727,
2621
+ "learning_rate": 6.95552360245078e-06,
2622
+ "loss": 2.0218,
2623
+ "step": 3540
2624
+ },
2625
+ {
2626
+ "epoch": 3.7447257383966246,
2627
+ "grad_norm": 6.929021835327148,
2628
+ "learning_rate": 6.940230855687612e-06,
2629
+ "loss": 1.9802,
2630
+ "step": 3550
2631
+ },
2632
+ {
2633
+ "epoch": 3.7552742616033754,
2634
+ "grad_norm": 6.215034484863281,
2635
+ "learning_rate": 6.924916711057611e-06,
2636
+ "loss": 1.9785,
2637
+ "step": 3560
2638
+ },
2639
+ {
2640
+ "epoch": 3.7658227848101267,
2641
+ "grad_norm": 6.629559516906738,
2642
+ "learning_rate": 6.909581337453067e-06,
2643
+ "loss": 2.0174,
2644
+ "step": 3570
2645
+ },
2646
+ {
2647
+ "epoch": 3.7763713080168775,
2648
+ "grad_norm": 5.6580610275268555,
2649
+ "learning_rate": 6.89422490400039e-06,
2650
+ "loss": 1.9229,
2651
+ "step": 3580
2652
+ },
2653
+ {
2654
+ "epoch": 3.7869198312236287,
2655
+ "grad_norm": 6.4606404304504395,
2656
+ "learning_rate": 6.878847580058251e-06,
2657
+ "loss": 1.9895,
2658
+ "step": 3590
2659
+ },
2660
+ {
2661
+ "epoch": 3.7974683544303796,
2662
+ "grad_norm": 6.211068153381348,
2663
+ "learning_rate": 6.863449535215711e-06,
2664
+ "loss": 2.0197,
2665
+ "step": 3600
2666
+ },
2667
+ {
2668
+ "epoch": 3.7974683544303796,
2669
+ "eval_loss": 2.1858973503112793,
2670
+ "eval_runtime": 72.397,
2671
+ "eval_samples_per_second": 36.963,
2672
+ "eval_steps_per_second": 2.321,
2673
+ "step": 3600
2674
+ },
2675
+ {
2676
+ "epoch": 3.808016877637131,
2677
+ "grad_norm": 8.330246925354004,
2678
+ "learning_rate": 6.848030939290354e-06,
2679
+ "loss": 1.9601,
2680
+ "step": 3610
2681
+ },
2682
+ {
2683
+ "epoch": 3.818565400843882,
2684
+ "grad_norm": 5.126586437225342,
2685
+ "learning_rate": 6.832591962326408e-06,
2686
+ "loss": 2.0912,
2687
+ "step": 3620
2688
+ },
2689
+ {
2690
+ "epoch": 3.829113924050633,
2691
+ "grad_norm": 5.562393665313721,
2692
+ "learning_rate": 6.817132774592877e-06,
2693
+ "loss": 1.9304,
2694
+ "step": 3630
2695
+ },
2696
+ {
2697
+ "epoch": 3.8396624472573837,
2698
+ "grad_norm": 5.8472161293029785,
2699
+ "learning_rate": 6.8016535465816595e-06,
2700
+ "loss": 1.9851,
2701
+ "step": 3640
2702
+ },
2703
+ {
2704
+ "epoch": 3.850210970464135,
2705
+ "grad_norm": 6.453331470489502,
2706
+ "learning_rate": 6.786154449005664e-06,
2707
+ "loss": 1.9529,
2708
+ "step": 3650
2709
+ },
2710
+ {
2711
+ "epoch": 3.8607594936708862,
2712
+ "grad_norm": 6.4626007080078125,
2713
+ "learning_rate": 6.770635652796939e-06,
2714
+ "loss": 2.0079,
2715
+ "step": 3660
2716
+ },
2717
+ {
2718
+ "epoch": 3.871308016877637,
2719
+ "grad_norm": 5.923832893371582,
2720
+ "learning_rate": 6.755097329104774e-06,
2721
+ "loss": 2.0045,
2722
+ "step": 3670
2723
+ },
2724
+ {
2725
+ "epoch": 3.8818565400843883,
2726
+ "grad_norm": 7.285524368286133,
2727
+ "learning_rate": 6.739539649293817e-06,
2728
+ "loss": 2.0568,
2729
+ "step": 3680
2730
+ },
2731
+ {
2732
+ "epoch": 3.892405063291139,
2733
+ "grad_norm": 5.927515983581543,
2734
+ "learning_rate": 6.723962784942187e-06,
2735
+ "loss": 2.0319,
2736
+ "step": 3690
2737
+ },
2738
+ {
2739
+ "epoch": 3.9029535864978904,
2740
+ "grad_norm": 8.093863487243652,
2741
+ "learning_rate": 6.708366907839583e-06,
2742
+ "loss": 2.0398,
2743
+ "step": 3700
2744
+ },
2745
+ {
2746
+ "epoch": 3.913502109704641,
2747
+ "grad_norm": 6.676210403442383,
2748
+ "learning_rate": 6.692752189985382e-06,
2749
+ "loss": 2.0859,
2750
+ "step": 3710
2751
+ },
2752
+ {
2753
+ "epoch": 3.9240506329113924,
2754
+ "grad_norm": 6.668973445892334,
2755
+ "learning_rate": 6.677118803586747e-06,
2756
+ "loss": 2.0269,
2757
+ "step": 3720
2758
+ },
2759
+ {
2760
+ "epoch": 3.9345991561181437,
2761
+ "grad_norm": 5.94500732421875,
2762
+ "learning_rate": 6.661466921056729e-06,
2763
+ "loss": 1.9676,
2764
+ "step": 3730
2765
+ },
2766
+ {
2767
+ "epoch": 3.9451476793248945,
2768
+ "grad_norm": 7.243645191192627,
2769
+ "learning_rate": 6.645796715012363e-06,
2770
+ "loss": 2.0383,
2771
+ "step": 3740
2772
+ },
2773
+ {
2774
+ "epoch": 3.9556962025316453,
2775
+ "grad_norm": 6.2618303298950195,
2776
+ "learning_rate": 6.630108358272767e-06,
2777
+ "loss": 1.9989,
2778
+ "step": 3750
2779
+ },
2780
+ {
2781
+ "epoch": 3.9662447257383966,
2782
+ "grad_norm": 7.027988433837891,
2783
+ "learning_rate": 6.614402023857231e-06,
2784
+ "loss": 2.0059,
2785
+ "step": 3760
2786
+ },
2787
+ {
2788
+ "epoch": 3.976793248945148,
2789
+ "grad_norm": 6.4713664054870605,
2790
+ "learning_rate": 6.598677884983315e-06,
2791
+ "loss": 2.0145,
2792
+ "step": 3770
2793
+ },
2794
+ {
2795
+ "epoch": 3.9873417721518987,
2796
+ "grad_norm": 6.756292343139648,
2797
+ "learning_rate": 6.582936115064934e-06,
2798
+ "loss": 1.9645,
2799
+ "step": 3780
2800
+ },
2801
+ {
2802
+ "epoch": 3.99789029535865,
2803
+ "grad_norm": 6.569133758544922,
2804
+ "learning_rate": 6.567176887710447e-06,
2805
+ "loss": 1.9966,
2806
+ "step": 3790
2807
+ },
2808
+ {
2809
+ "epoch": 4.008438818565401,
2810
+ "grad_norm": 7.79607629776001,
2811
+ "learning_rate": 6.551400376720744e-06,
2812
+ "loss": 1.9671,
2813
+ "step": 3800
2814
+ },
2815
+ {
2816
+ "epoch": 4.008438818565401,
2817
+ "eval_loss": 2.191293954849243,
2818
+ "eval_runtime": 73.3051,
2819
+ "eval_samples_per_second": 36.505,
2820
+ "eval_steps_per_second": 2.292,
2821
+ "step": 3800
2822
+ },
2823
+ {
2824
+ "epoch": 4.018987341772152,
2825
+ "grad_norm": 5.740083694458008,
2826
+ "learning_rate": 6.5356067560873275e-06,
2827
+ "loss": 1.9533,
2828
+ "step": 3810
2829
+ },
2830
+ {
2831
+ "epoch": 4.029535864978903,
2832
+ "grad_norm": 7.8538408279418945,
2833
+ "learning_rate": 6.5197961999903925e-06,
2834
+ "loss": 1.9217,
2835
+ "step": 3820
2836
+ },
2837
+ {
2838
+ "epoch": 4.040084388185654,
2839
+ "grad_norm": 7.259963512420654,
2840
+ "learning_rate": 6.503968882796909e-06,
2841
+ "loss": 1.8411,
2842
+ "step": 3830
2843
+ },
2844
+ {
2845
+ "epoch": 4.050632911392405,
2846
+ "grad_norm": 7.174349308013916,
2847
+ "learning_rate": 6.4881249790586975e-06,
2848
+ "loss": 1.9018,
2849
+ "step": 3840
2850
+ },
2851
+ {
2852
+ "epoch": 4.061181434599156,
2853
+ "grad_norm": 7.046865940093994,
2854
+ "learning_rate": 6.472264663510501e-06,
2855
+ "loss": 1.9452,
2856
+ "step": 3850
2857
+ },
2858
+ {
2859
+ "epoch": 4.071729957805907,
2860
+ "grad_norm": 6.623846530914307,
2861
+ "learning_rate": 6.45638811106806e-06,
2862
+ "loss": 1.8543,
2863
+ "step": 3860
2864
+ },
2865
+ {
2866
+ "epoch": 4.082278481012658,
2867
+ "grad_norm": 7.323060512542725,
2868
+ "learning_rate": 6.440495496826189e-06,
2869
+ "loss": 1.841,
2870
+ "step": 3870
2871
+ },
2872
+ {
2873
+ "epoch": 4.0928270042194095,
2874
+ "grad_norm": 5.619576930999756,
2875
+ "learning_rate": 6.424586996056834e-06,
2876
+ "loss": 1.957,
2877
+ "step": 3880
2878
+ },
2879
+ {
2880
+ "epoch": 4.10337552742616,
2881
+ "grad_norm": 7.116107940673828,
2882
+ "learning_rate": 6.408662784207149e-06,
2883
+ "loss": 1.9459,
2884
+ "step": 3890
2885
+ },
2886
+ {
2887
+ "epoch": 4.113924050632911,
2888
+ "grad_norm": 7.9187331199646,
2889
+ "learning_rate": 6.392723036897559e-06,
2890
+ "loss": 1.9138,
2891
+ "step": 3900
2892
+ },
2893
+ {
2894
+ "epoch": 4.124472573839663,
2895
+ "grad_norm": 8.708395004272461,
2896
+ "learning_rate": 6.376767929919819e-06,
2897
+ "loss": 1.9134,
2898
+ "step": 3910
2899
+ },
2900
+ {
2901
+ "epoch": 4.135021097046414,
2902
+ "grad_norm": 7.498927116394043,
2903
+ "learning_rate": 6.36079763923508e-06,
2904
+ "loss": 1.8623,
2905
+ "step": 3920
2906
+ },
2907
+ {
2908
+ "epoch": 4.1455696202531644,
2909
+ "grad_norm": 7.128155708312988,
2910
+ "learning_rate": 6.344812340971948e-06,
2911
+ "loss": 1.9377,
2912
+ "step": 3930
2913
+ },
2914
+ {
2915
+ "epoch": 4.156118143459915,
2916
+ "grad_norm": 6.856624126434326,
2917
+ "learning_rate": 6.328812211424539e-06,
2918
+ "loss": 1.9371,
2919
+ "step": 3940
2920
+ },
2921
+ {
2922
+ "epoch": 4.166666666666667,
2923
+ "grad_norm": 6.839701175689697,
2924
+ "learning_rate": 6.312797427050533e-06,
2925
+ "loss": 1.9375,
2926
+ "step": 3950
2927
+ },
2928
+ {
2929
+ "epoch": 4.177215189873418,
2930
+ "grad_norm": 7.109929084777832,
2931
+ "learning_rate": 6.296768164469236e-06,
2932
+ "loss": 1.8356,
2933
+ "step": 3960
2934
+ },
2935
+ {
2936
+ "epoch": 4.187763713080169,
2937
+ "grad_norm": 8.737268447875977,
2938
+ "learning_rate": 6.2807246004596265e-06,
2939
+ "loss": 1.9655,
2940
+ "step": 3970
2941
+ },
2942
+ {
2943
+ "epoch": 4.198312236286919,
2944
+ "grad_norm": 6.807742595672607,
2945
+ "learning_rate": 6.264666911958404e-06,
2946
+ "loss": 1.886,
2947
+ "step": 3980
2948
+ },
2949
+ {
2950
+ "epoch": 4.208860759493671,
2951
+ "grad_norm": 8.093104362487793,
2952
+ "learning_rate": 6.248595276058041e-06,
2953
+ "loss": 1.9565,
2954
+ "step": 3990
2955
+ },
2956
+ {
2957
+ "epoch": 4.219409282700422,
2958
+ "grad_norm": 6.961418151855469,
2959
+ "learning_rate": 6.232509870004831e-06,
2960
+ "loss": 1.8848,
2961
+ "step": 4000
2962
+ },
2963
+ {
2964
+ "epoch": 4.219409282700422,
2965
+ "eval_loss": 2.211486577987671,
2966
+ "eval_runtime": 72.3262,
2967
+ "eval_samples_per_second": 36.999,
2968
+ "eval_steps_per_second": 2.323,
2969
+ "step": 4000
2970
+ },
2971
+ {
2972
+ "epoch": 4.229957805907173,
2973
+ "grad_norm": 9.551440238952637,
2974
+ "learning_rate": 6.216410871196927e-06,
2975
+ "loss": 1.9088,
2976
+ "step": 4010
2977
+ },
2978
+ {
2979
+ "epoch": 4.2405063291139244,
2980
+ "grad_norm": 8.6002197265625,
2981
+ "learning_rate": 6.200298457182396e-06,
2982
+ "loss": 1.994,
2983
+ "step": 4020
2984
+ },
2985
+ {
2986
+ "epoch": 4.251054852320675,
2987
+ "grad_norm": 6.9018402099609375,
2988
+ "learning_rate": 6.184172805657254e-06,
2989
+ "loss": 1.8591,
2990
+ "step": 4030
2991
+ },
2992
+ {
2993
+ "epoch": 4.261603375527426,
2994
+ "grad_norm": 7.242821216583252,
2995
+ "learning_rate": 6.168034094463501e-06,
2996
+ "loss": 2.0,
2997
+ "step": 4040
2998
+ },
2999
+ {
3000
+ "epoch": 4.272151898734177,
3001
+ "grad_norm": 7.388881683349609,
3002
+ "learning_rate": 6.151882501587171e-06,
3003
+ "loss": 1.9282,
3004
+ "step": 4050
3005
+ },
3006
+ {
3007
+ "epoch": 4.282700421940929,
3008
+ "grad_norm": 7.072291851043701,
3009
+ "learning_rate": 6.135718205156363e-06,
3010
+ "loss": 1.9579,
3011
+ "step": 4060
3012
+ },
3013
+ {
3014
+ "epoch": 4.293248945147679,
3015
+ "grad_norm": 7.324854373931885,
3016
+ "learning_rate": 6.119541383439278e-06,
3017
+ "loss": 1.9188,
3018
+ "step": 4070
3019
+ },
3020
+ {
3021
+ "epoch": 4.30379746835443,
3022
+ "grad_norm": 7.626977920532227,
3023
+ "learning_rate": 6.103352214842252e-06,
3024
+ "loss": 1.9007,
3025
+ "step": 4080
3026
+ },
3027
+ {
3028
+ "epoch": 4.314345991561181,
3029
+ "grad_norm": 7.602935314178467,
3030
+ "learning_rate": 6.087150877907786e-06,
3031
+ "loss": 1.8915,
3032
+ "step": 4090
3033
+ },
3034
+ {
3035
+ "epoch": 4.324894514767933,
3036
+ "grad_norm": 8.161288261413574,
3037
+ "learning_rate": 6.070937551312583e-06,
3038
+ "loss": 1.9291,
3039
+ "step": 4100
3040
+ },
3041
+ {
3042
+ "epoch": 4.3354430379746836,
3043
+ "grad_norm": 9.579643249511719,
3044
+ "learning_rate": 6.054712413865572e-06,
3045
+ "loss": 1.966,
3046
+ "step": 4110
3047
+ },
3048
+ {
3049
+ "epoch": 4.345991561181434,
3050
+ "grad_norm": 6.629637718200684,
3051
+ "learning_rate": 6.038475644505942e-06,
3052
+ "loss": 1.8704,
3053
+ "step": 4120
3054
+ },
3055
+ {
3056
+ "epoch": 4.356540084388186,
3057
+ "grad_norm": 8.050599098205566,
3058
+ "learning_rate": 6.02222742230116e-06,
3059
+ "loss": 1.9687,
3060
+ "step": 4130
3061
+ },
3062
+ {
3063
+ "epoch": 4.367088607594937,
3064
+ "grad_norm": 6.470215797424316,
3065
+ "learning_rate": 6.005967926445002e-06,
3066
+ "loss": 1.9371,
3067
+ "step": 4140
3068
+ },
3069
+ {
3070
+ "epoch": 4.377637130801688,
3071
+ "grad_norm": 7.582352161407471,
3072
+ "learning_rate": 5.989697336255578e-06,
3073
+ "loss": 1.9052,
3074
+ "step": 4150
3075
+ },
3076
+ {
3077
+ "epoch": 4.3881856540084385,
3078
+ "grad_norm": 6.557480335235596,
3079
+ "learning_rate": 5.97341583117335e-06,
3080
+ "loss": 1.9683,
3081
+ "step": 4160
3082
+ },
3083
+ {
3084
+ "epoch": 4.39873417721519,
3085
+ "grad_norm": 7.335679531097412,
3086
+ "learning_rate": 5.957123590759156e-06,
3087
+ "loss": 1.9361,
3088
+ "step": 4170
3089
+ },
3090
+ {
3091
+ "epoch": 4.409282700421941,
3092
+ "grad_norm": 7.878922939300537,
3093
+ "learning_rate": 5.940820794692228e-06,
3094
+ "loss": 1.8536,
3095
+ "step": 4180
3096
+ },
3097
+ {
3098
+ "epoch": 4.419831223628692,
3099
+ "grad_norm": 8.240972518920898,
3100
+ "learning_rate": 5.924507622768212e-06,
3101
+ "loss": 1.881,
3102
+ "step": 4190
3103
+ },
3104
+ {
3105
+ "epoch": 4.430379746835443,
3106
+ "grad_norm": 6.894169330596924,
3107
+ "learning_rate": 5.908184254897183e-06,
3108
+ "loss": 1.9566,
3109
+ "step": 4200
3110
+ },
3111
+ {
3112
+ "epoch": 4.430379746835443,
3113
+ "eval_loss": 2.2033305168151855,
3114
+ "eval_runtime": 72.2758,
3115
+ "eval_samples_per_second": 37.025,
3116
+ "eval_steps_per_second": 2.324,
3117
+ "step": 4200
3118
+ },
3119
+ {
3120
+ "epoch": 4.440928270042194,
3121
+ "grad_norm": 6.864879131317139,
3122
+ "learning_rate": 5.891850871101663e-06,
3123
+ "loss": 1.9219,
3124
+ "step": 4210
3125
+ },
3126
+ {
3127
+ "epoch": 4.451476793248945,
3128
+ "grad_norm": 7.902835845947266,
3129
+ "learning_rate": 5.875507651514636e-06,
3130
+ "loss": 1.8759,
3131
+ "step": 4220
3132
+ },
3133
+ {
3134
+ "epoch": 4.462025316455696,
3135
+ "grad_norm": 6.560957431793213,
3136
+ "learning_rate": 5.859154776377559e-06,
3137
+ "loss": 1.9174,
3138
+ "step": 4230
3139
+ },
3140
+ {
3141
+ "epoch": 4.472573839662447,
3142
+ "grad_norm": 7.267096996307373,
3143
+ "learning_rate": 5.842792426038373e-06,
3144
+ "loss": 1.9557,
3145
+ "step": 4240
3146
+ },
3147
+ {
3148
+ "epoch": 4.4831223628691985,
3149
+ "grad_norm": 7.733441352844238,
3150
+ "learning_rate": 5.826420780949521e-06,
3151
+ "loss": 1.8834,
3152
+ "step": 4250
3153
+ },
3154
+ {
3155
+ "epoch": 4.493670886075949,
3156
+ "grad_norm": 7.876978397369385,
3157
+ "learning_rate": 5.8100400216659505e-06,
3158
+ "loss": 1.9299,
3159
+ "step": 4260
3160
+ },
3161
+ {
3162
+ "epoch": 4.5042194092827,
3163
+ "grad_norm": 6.604508876800537,
3164
+ "learning_rate": 5.793650328843127e-06,
3165
+ "loss": 1.8945,
3166
+ "step": 4270
3167
+ },
3168
+ {
3169
+ "epoch": 4.514767932489452,
3170
+ "grad_norm": 7.7010369300842285,
3171
+ "learning_rate": 5.7772518832350345e-06,
3172
+ "loss": 1.832,
3173
+ "step": 4280
3174
+ },
3175
+ {
3176
+ "epoch": 4.525316455696203,
3177
+ "grad_norm": 7.013700485229492,
3178
+ "learning_rate": 5.760844865692196e-06,
3179
+ "loss": 1.8611,
3180
+ "step": 4290
3181
+ },
3182
+ {
3183
+ "epoch": 4.5358649789029535,
3184
+ "grad_norm": 9.031999588012695,
3185
+ "learning_rate": 5.744429457159661e-06,
3186
+ "loss": 1.9162,
3187
+ "step": 4300
3188
+ },
3189
+ {
3190
+ "epoch": 4.546413502109704,
3191
+ "grad_norm": 7.503841400146484,
3192
+ "learning_rate": 5.728005838675026e-06,
3193
+ "loss": 1.9886,
3194
+ "step": 4310
3195
+ },
3196
+ {
3197
+ "epoch": 4.556962025316456,
3198
+ "grad_norm": 7.301093578338623,
3199
+ "learning_rate": 5.711574191366427e-06,
3200
+ "loss": 1.936,
3201
+ "step": 4320
3202
+ },
3203
+ {
3204
+ "epoch": 4.567510548523207,
3205
+ "grad_norm": 9.621993064880371,
3206
+ "learning_rate": 5.695134696450548e-06,
3207
+ "loss": 1.95,
3208
+ "step": 4330
3209
+ },
3210
+ {
3211
+ "epoch": 4.578059071729958,
3212
+ "grad_norm": 6.9891767501831055,
3213
+ "learning_rate": 5.6786875352306205e-06,
3214
+ "loss": 1.9347,
3215
+ "step": 4340
3216
+ },
3217
+ {
3218
+ "epoch": 4.588607594936709,
3219
+ "grad_norm": 8.195670127868652,
3220
+ "learning_rate": 5.662232889094423e-06,
3221
+ "loss": 1.9793,
3222
+ "step": 4350
3223
+ },
3224
+ {
3225
+ "epoch": 4.59915611814346,
3226
+ "grad_norm": 7.516731262207031,
3227
+ "learning_rate": 5.645770939512284e-06,
3228
+ "loss": 1.9325,
3229
+ "step": 4360
3230
+ },
3231
+ {
3232
+ "epoch": 4.609704641350211,
3233
+ "grad_norm": 8.283931732177734,
3234
+ "learning_rate": 5.629301868035072e-06,
3235
+ "loss": 1.9616,
3236
+ "step": 4370
3237
+ },
3238
+ {
3239
+ "epoch": 4.620253164556962,
3240
+ "grad_norm": 7.950915336608887,
3241
+ "learning_rate": 5.6128258562922065e-06,
3242
+ "loss": 1.919,
3243
+ "step": 4380
3244
+ },
3245
+ {
3246
+ "epoch": 4.630801687763713,
3247
+ "grad_norm": 8.076128959655762,
3248
+ "learning_rate": 5.596343085989644e-06,
3249
+ "loss": 1.912,
3250
+ "step": 4390
3251
+ },
3252
+ {
3253
+ "epoch": 4.641350210970464,
3254
+ "grad_norm": 7.961559295654297,
3255
+ "learning_rate": 5.579853738907878e-06,
3256
+ "loss": 1.9092,
3257
+ "step": 4400
3258
+ },
3259
+ {
3260
+ "epoch": 4.641350210970464,
3261
+ "eval_loss": 2.200444459915161,
3262
+ "eval_runtime": 72.3626,
3263
+ "eval_samples_per_second": 36.98,
3264
+ "eval_steps_per_second": 2.322,
3265
+ "step": 4400
3266
+ },
3267
+ {
3268
+ "epoch": 4.651898734177215,
3269
+ "grad_norm": 7.390754699707031,
3270
+ "learning_rate": 5.563357996899937e-06,
3271
+ "loss": 1.8476,
3272
+ "step": 4410
3273
+ },
3274
+ {
3275
+ "epoch": 4.662447257383966,
3276
+ "grad_norm": 7.94099235534668,
3277
+ "learning_rate": 5.546856041889374e-06,
3278
+ "loss": 1.9666,
3279
+ "step": 4420
3280
+ },
3281
+ {
3282
+ "epoch": 4.672995780590718,
3283
+ "grad_norm": 7.32828426361084,
3284
+ "learning_rate": 5.530348055868258e-06,
3285
+ "loss": 1.8731,
3286
+ "step": 4430
3287
+ },
3288
+ {
3289
+ "epoch": 4.6835443037974684,
3290
+ "grad_norm": 7.447146892547607,
3291
+ "learning_rate": 5.513834220895179e-06,
3292
+ "loss": 1.9457,
3293
+ "step": 4440
3294
+ },
3295
+ {
3296
+ "epoch": 4.694092827004219,
3297
+ "grad_norm": 7.633859634399414,
3298
+ "learning_rate": 5.49731471909323e-06,
3299
+ "loss": 1.9227,
3300
+ "step": 4450
3301
+ },
3302
+ {
3303
+ "epoch": 4.70464135021097,
3304
+ "grad_norm": 7.893139839172363,
3305
+ "learning_rate": 5.4807897326479935e-06,
3306
+ "loss": 1.96,
3307
+ "step": 4460
3308
+ },
3309
+ {
3310
+ "epoch": 4.715189873417722,
3311
+ "grad_norm": 7.659285068511963,
3312
+ "learning_rate": 5.464259443805549e-06,
3313
+ "loss": 1.899,
3314
+ "step": 4470
3315
+ },
3316
+ {
3317
+ "epoch": 4.725738396624473,
3318
+ "grad_norm": 7.269672393798828,
3319
+ "learning_rate": 5.447724034870451e-06,
3320
+ "loss": 1.9714,
3321
+ "step": 4480
3322
+ },
3323
+ {
3324
+ "epoch": 4.736286919831223,
3325
+ "grad_norm": 9.073884963989258,
3326
+ "learning_rate": 5.431183688203716e-06,
3327
+ "loss": 1.8903,
3328
+ "step": 4490
3329
+ },
3330
+ {
3331
+ "epoch": 4.746835443037975,
3332
+ "grad_norm": 8.139067649841309,
3333
+ "learning_rate": 5.414638586220824e-06,
3334
+ "loss": 1.9938,
3335
+ "step": 4500
3336
+ },
3337
+ {
3338
+ "epoch": 4.757383966244726,
3339
+ "grad_norm": 7.44675350189209,
3340
+ "learning_rate": 5.398088911389692e-06,
3341
+ "loss": 1.9624,
3342
+ "step": 4510
3343
+ },
3344
+ {
3345
+ "epoch": 4.767932489451477,
3346
+ "grad_norm": 6.38553524017334,
3347
+ "learning_rate": 5.381534846228673e-06,
3348
+ "loss": 1.899,
3349
+ "step": 4520
3350
+ },
3351
+ {
3352
+ "epoch": 4.7784810126582276,
3353
+ "grad_norm": 6.534326076507568,
3354
+ "learning_rate": 5.364976573304538e-06,
3355
+ "loss": 1.9361,
3356
+ "step": 4530
3357
+ },
3358
+ {
3359
+ "epoch": 4.789029535864979,
3360
+ "grad_norm": 8.395467758178711,
3361
+ "learning_rate": 5.348414275230464e-06,
3362
+ "loss": 1.9407,
3363
+ "step": 4540
3364
+ },
3365
+ {
3366
+ "epoch": 4.79957805907173,
3367
+ "grad_norm": 8.655330657958984,
3368
+ "learning_rate": 5.331848134664017e-06,
3369
+ "loss": 1.93,
3370
+ "step": 4550
3371
+ },
3372
+ {
3373
+ "epoch": 4.810126582278481,
3374
+ "grad_norm": 8.28433895111084,
3375
+ "learning_rate": 5.315278334305143e-06,
3376
+ "loss": 1.9085,
3377
+ "step": 4560
3378
+ },
3379
+ {
3380
+ "epoch": 4.820675105485232,
3381
+ "grad_norm": 8.20410442352295,
3382
+ "learning_rate": 5.298705056894145e-06,
3383
+ "loss": 1.8855,
3384
+ "step": 4570
3385
+ },
3386
+ {
3387
+ "epoch": 4.831223628691983,
3388
+ "grad_norm": 6.601258754730225,
3389
+ "learning_rate": 5.2821284852096835e-06,
3390
+ "loss": 1.9646,
3391
+ "step": 4580
3392
+ },
3393
+ {
3394
+ "epoch": 4.841772151898734,
3395
+ "grad_norm": 7.801843166351318,
3396
+ "learning_rate": 5.2655488020667365e-06,
3397
+ "loss": 1.9374,
3398
+ "step": 4590
3399
+ },
3400
+ {
3401
+ "epoch": 4.852320675105485,
3402
+ "grad_norm": 7.910266876220703,
3403
+ "learning_rate": 5.248966190314604e-06,
3404
+ "loss": 1.8674,
3405
+ "step": 4600
3406
+ },
3407
+ {
3408
+ "epoch": 4.852320675105485,
3409
+ "eval_loss": 2.2033960819244385,
3410
+ "eval_runtime": 72.336,
3411
+ "eval_samples_per_second": 36.994,
3412
+ "eval_steps_per_second": 2.322,
3413
+ "step": 4600
3414
+ },
3415
+ {
3416
+ "epoch": 4.852320675105485,
3417
+ "step": 4600,
3418
+ "total_flos": 2.1607649914755482e+18,
3419
+ "train_loss": 2.1172233917402186,
3420
+ "train_runtime": 8185.4824,
3421
+ "train_samples_per_second": 18.521,
3422
+ "train_steps_per_second": 1.158
3423
+ }
3424
+ ],
3425
+ "logging_steps": 10,
3426
+ "max_steps": 9480,
3427
+ "num_input_tokens_seen": 0,
3428
+ "num_train_epochs": 10,
3429
+ "save_steps": 1000,
3430
+ "total_flos": 2.1607649914755482e+18,
3431
+ "train_batch_size": 8,
3432
+ "trial_name": null,
3433
+ "trial_params": null
3434
+ }
llama3_8b_peft/topical_chat/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8008082f91afdaf489a6993caa4264d3aea8f16dc14831953940cbec9fe629d
3
+ size 5176
llama3_8b_peft/topical_chat/training_eval_loss.png ADDED
llama3_8b_peft/topical_chat/training_loss.png ADDED
llama3_8b_peft/unit_conversion/README.md ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: unit_conversion_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # unit_conversion_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the unit_conversion_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.4978
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 5e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: cosine
46
+ - lr_scheduler_warmup_steps: 20
47
+ - num_epochs: 5.0
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss |
52
+ |:-------------:|:------:|:----:|:---------------:|
53
+ | 0.9027 | 0.0440 | 100 | 0.7020 |
54
+ | 0.2938 | 0.0879 | 200 | 0.6444 |
55
+ | 0.4661 | 0.1319 | 300 | 0.6325 |
56
+ | 0.3919 | 0.1758 | 400 | 0.6242 |
57
+ | 0.7109 | 0.2198 | 500 | 0.5902 |
58
+ | 0.6498 | 0.2637 | 600 | 0.5745 |
59
+ | 0.5876 | 0.3077 | 700 | 0.5662 |
60
+ | 0.6305 | 0.3516 | 800 | 0.5663 |
61
+ | 0.5547 | 0.3956 | 900 | 0.5513 |
62
+ | 0.5664 | 0.4396 | 1000 | 0.5478 |
63
+ | 0.3077 | 0.4835 | 1100 | 0.5378 |
64
+ | 0.2881 | 0.5275 | 1200 | 0.5418 |
65
+ | 0.4763 | 0.5714 | 1300 | 0.5294 |
66
+ | 0.6231 | 0.6154 | 1400 | 0.5200 |
67
+ | 0.4772 | 0.6593 | 1500 | 0.5204 |
68
+ | 0.7734 | 0.7033 | 1600 | 0.5234 |
69
+ | 0.5392 | 0.7473 | 1700 | 0.5196 |
70
+ | 0.2517 | 0.7912 | 1800 | 0.5174 |
71
+ | 0.5605 | 0.8352 | 1900 | 0.5051 |
72
+ | 0.8864 | 0.8791 | 2000 | 0.5247 |
73
+ | 0.4695 | 0.9231 | 2100 | 0.5049 |
74
+ | 0.3266 | 0.9670 | 2200 | 0.5186 |
75
+ | 0.4204 | 1.0110 | 2300 | 0.5013 |
76
+ | 0.6305 | 1.0549 | 2400 | 0.5093 |
77
+ | 0.4638 | 1.0989 | 2500 | 0.5108 |
78
+ | 0.2316 | 1.1429 | 2600 | 0.4978 |
79
+ | 0.4178 | 1.1868 | 2700 | 0.5096 |
80
+ | 0.3317 | 1.2308 | 2800 | 0.5018 |
81
+ | 0.5847 | 1.2747 | 2900 | 0.5059 |
82
+
83
+
84
+ ### Framework versions
85
+
86
+ - PEFT 0.10.0
87
+ - Transformers 4.40.0
88
+ - Pytorch 2.2.1
89
+ - Datasets 2.18.0
90
+ - Tokenizers 0.19.1
llama3_8b_peft/unit_conversion/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "down_proj",
24
+ "v_proj",
25
+ "up_proj",
26
+ "o_proj",
27
+ "gate_proj",
28
+ "q_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/unit_conversion/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de90ec5dcb0b771f633a197793ae17f3d5f55f04ba8a2747648c51f15caf7e65
3
+ size 83945296
llama3_8b_peft/unit_conversion/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.2747252747252746,
3
+ "eval_loss": 0.497779905796051,
4
+ "eval_runtime": 11.4488,
5
+ "eval_samples_per_second": 83.677,
6
+ "eval_steps_per_second": 10.481,
7
+ "total_flos": 1.3202023431477658e+17,
8
+ "train_loss": 0.5084886488010143,
9
+ "train_runtime": 1223.9849,
10
+ "train_samples_per_second": 74.319,
11
+ "train_steps_per_second": 9.293
12
+ }
llama3_8b_peft/unit_conversion/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.2747252747252746,
3
+ "eval_loss": 0.497779905796051,
4
+ "eval_runtime": 11.4488,
5
+ "eval_samples_per_second": 83.677,
6
+ "eval_steps_per_second": 10.481
7
+ }
llama3_8b_peft/unit_conversion/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/unit_conversion/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/unit_conversion/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }