ParagonLight commited on
Commit
791da23
·
1 Parent(s): 60bd5ed

update loras

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. llama3_8b_peft/cnn_dailymail/README.md +79 -0
  2. llama3_8b_peft/cnn_dailymail/adapter_config.json +34 -0
  3. llama3_8b_peft/cnn_dailymail/adapter_model.safetensors +3 -0
  4. llama3_8b_peft/cnn_dailymail/all_results.json +12 -0
  5. llama3_8b_peft/cnn_dailymail/eval_results.json +7 -0
  6. llama3_8b_peft/cnn_dailymail/special_tokens_map.json +17 -0
  7. llama3_8b_peft/cnn_dailymail/tokenizer.json +0 -0
  8. llama3_8b_peft/cnn_dailymail/tokenizer_config.json +2065 -0
  9. llama3_8b_peft/cnn_dailymail/train_results.json +8 -0
  10. llama3_8b_peft/cnn_dailymail/trainer_log.jsonl +296 -0
  11. llama3_8b_peft/cnn_dailymail/trainer_state.json +2102 -0
  12. llama3_8b_peft/cnn_dailymail/training_args.bin +3 -0
  13. llama3_8b_peft/cnn_dailymail/training_eval_loss.png +0 -0
  14. llama3_8b_peft/cnn_dailymail/training_loss.png +0 -0
  15. llama3_8b_peft/goal_step_wikihow/README.md +68 -0
  16. llama3_8b_peft/goal_step_wikihow/adapter_config.json +34 -0
  17. llama3_8b_peft/goal_step_wikihow/adapter_model.safetensors +3 -0
  18. llama3_8b_peft/goal_step_wikihow/all_results.json +12 -0
  19. llama3_8b_peft/goal_step_wikihow/eval_results.json +7 -0
  20. llama3_8b_peft/goal_step_wikihow/special_tokens_map.json +17 -0
  21. llama3_8b_peft/goal_step_wikihow/tokenizer.json +0 -0
  22. llama3_8b_peft/goal_step_wikihow/tokenizer_config.json +2065 -0
  23. llama3_8b_peft/goal_step_wikihow/train_results.json +8 -0
  24. llama3_8b_peft/goal_step_wikihow/trainer_log.jsonl +79 -0
  25. llama3_8b_peft/goal_step_wikihow/trainer_state.json +576 -0
  26. llama3_8b_peft/goal_step_wikihow/training_args.bin +3 -0
  27. llama3_8b_peft/goal_step_wikihow/training_eval_loss.png +0 -0
  28. llama3_8b_peft/goal_step_wikihow/training_loss.png +0 -0
  29. llama3_8b_peft/gsm8k/README.md +79 -0
  30. llama3_8b_peft/gsm8k/adapter_config.json +34 -0
  31. llama3_8b_peft/gsm8k/adapter_model.safetensors +3 -0
  32. llama3_8b_peft/gsm8k/all_results.json +12 -0
  33. llama3_8b_peft/gsm8k/eval_results.json +7 -0
  34. llama3_8b_peft/gsm8k/special_tokens_map.json +17 -0
  35. llama3_8b_peft/gsm8k/tokenizer.json +0 -0
  36. llama3_8b_peft/gsm8k/tokenizer_config.json +2065 -0
  37. llama3_8b_peft/gsm8k/train_results.json +8 -0
  38. llama3_8b_peft/gsm8k/trainer_log.jsonl +296 -0
  39. llama3_8b_peft/gsm8k/trainer_state.json +2102 -0
  40. llama3_8b_peft/gsm8k/training_args.bin +3 -0
  41. llama3_8b_peft/gsm8k/training_eval_loss.png +0 -0
  42. llama3_8b_peft/gsm8k/training_loss.png +0 -0
  43. llama3_8b_peft/logical_deduction/README.md +72 -0
  44. llama3_8b_peft/logical_deduction/adapter_config.json +34 -0
  45. llama3_8b_peft/logical_deduction/adapter_model.safetensors +3 -0
  46. llama3_8b_peft/logical_deduction/all_results.json +12 -0
  47. llama3_8b_peft/logical_deduction/eval_results.json +7 -0
  48. llama3_8b_peft/logical_deduction/special_tokens_map.json +17 -0
  49. llama3_8b_peft/logical_deduction/tokenizer.json +0 -0
  50. llama3_8b_peft/logical_deduction/tokenizer_config.json +2065 -0
llama3_8b_peft/cnn_dailymail/README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: cnn_dailymail_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # cnn_dailymail_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the cnn_dailymail_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.2493
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 2
46
+ - total_train_batch_size: 16
47
+ - total_eval_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_steps: 20
51
+ - num_epochs: 10.0
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:------:|:----:|:---------------:|
57
+ | 1.2614 | 0.3759 | 200 | 1.3073 |
58
+ | 1.3527 | 0.7519 | 400 | 1.2857 |
59
+ | 1.239 | 1.1278 | 600 | 1.2748 |
60
+ | 1.2647 | 1.5038 | 800 | 1.2676 |
61
+ | 1.307 | 1.8797 | 1000 | 1.2614 |
62
+ | 1.207 | 2.2556 | 1200 | 1.2564 |
63
+ | 1.2654 | 2.6316 | 1400 | 1.2536 |
64
+ | 1.1963 | 3.0075 | 1600 | 1.2510 |
65
+ | 1.164 | 3.3835 | 1800 | 1.2510 |
66
+ | 1.2174 | 3.7594 | 2000 | 1.2493 |
67
+ | 1.1286 | 4.1353 | 2200 | 1.2526 |
68
+ | 1.1467 | 4.5113 | 2400 | 1.2552 |
69
+ | 1.1357 | 4.8872 | 2600 | 1.2556 |
70
+ | 1.0742 | 5.2632 | 2800 | 1.2630 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - PEFT 0.10.0
76
+ - Transformers 4.40.0
77
+ - Pytorch 2.2.1
78
+ - Datasets 2.18.0
79
+ - Tokenizers 0.19.1
llama3_8b_peft/cnn_dailymail/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "gate_proj",
24
+ "up_proj",
25
+ "o_proj",
26
+ "q_proj",
27
+ "v_proj",
28
+ "k_proj",
29
+ "down_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/cnn_dailymail/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b94663c816acfdd45a648677188012985898c86ce6add253059af9e6ce680c6
3
+ size 83945296
llama3_8b_peft/cnn_dailymail/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.2631578947368425,
3
+ "eval_loss": 1.2493139505386353,
4
+ "eval_runtime": 92.6006,
5
+ "eval_samples_per_second": 16.199,
6
+ "eval_steps_per_second": 1.015,
7
+ "total_flos": 2.975888186909131e+18,
8
+ "train_loss": 1.220632496220725,
9
+ "train_runtime": 9849.9922,
10
+ "train_samples_per_second": 8.629,
11
+ "train_steps_per_second": 0.54
12
+ }
llama3_8b_peft/cnn_dailymail/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.2631578947368425,
3
+ "eval_loss": 1.2493139505386353,
4
+ "eval_runtime": 92.6006,
5
+ "eval_samples_per_second": 16.199,
6
+ "eval_steps_per_second": 1.015
7
+ }
llama3_8b_peft/cnn_dailymail/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/cnn_dailymail/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/cnn_dailymail/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }
llama3_8b_peft/cnn_dailymail/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.2631578947368425,
3
+ "total_flos": 2.975888186909131e+18,
4
+ "train_loss": 1.220632496220725,
5
+ "train_runtime": 9849.9922,
6
+ "train_samples_per_second": 8.629,
7
+ "train_steps_per_second": 0.54
8
+ }
llama3_8b_peft/cnn_dailymail/trainer_log.jsonl ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 5320, "loss": 1.8478, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-06, "epoch": 0.018796992481203006, "percentage": 0.19, "elapsed_time": "0:00:31", "remaining_time": "4:42:07"}
2
+ {"current_steps": 20, "total_steps": 5320, "loss": 1.8363, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1e-05, "epoch": 0.03759398496240601, "percentage": 0.38, "elapsed_time": "0:01:01", "remaining_time": "4:32:46"}
3
+ {"current_steps": 30, "total_steps": 5320, "loss": 1.7987, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.999912161129377e-06, "epoch": 0.05639097744360902, "percentage": 0.56, "elapsed_time": "0:01:32", "remaining_time": "4:31:25"}
4
+ {"current_steps": 40, "total_steps": 5320, "loss": 1.6542, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.999648647603774e-06, "epoch": 0.07518796992481203, "percentage": 0.75, "elapsed_time": "0:02:02", "remaining_time": "4:28:24"}
5
+ {"current_steps": 50, "total_steps": 5320, "loss": 1.589, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.999209468681885e-06, "epoch": 0.09398496240601503, "percentage": 0.94, "elapsed_time": "0:02:32", "remaining_time": "4:28:26"}
6
+ {"current_steps": 60, "total_steps": 5320, "loss": 1.4517, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.998594639794502e-06, "epoch": 0.11278195488721804, "percentage": 1.13, "elapsed_time": "0:03:02", "remaining_time": "4:26:54"}
7
+ {"current_steps": 70, "total_steps": 5320, "loss": 1.368, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.997804182543973e-06, "epoch": 0.13157894736842105, "percentage": 1.32, "elapsed_time": "0:03:34", "remaining_time": "4:27:31"}
8
+ {"current_steps": 80, "total_steps": 5320, "loss": 1.3354, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.996838124703448e-06, "epoch": 0.15037593984962405, "percentage": 1.5, "elapsed_time": "0:04:05", "remaining_time": "4:27:44"}
9
+ {"current_steps": 90, "total_steps": 5320, "loss": 1.3313, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.995696500215899e-06, "epoch": 0.16917293233082706, "percentage": 1.69, "elapsed_time": "0:04:33", "remaining_time": "4:25:03"}
10
+ {"current_steps": 100, "total_steps": 5320, "loss": 1.3679, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.994379349192927e-06, "epoch": 0.18796992481203006, "percentage": 1.88, "elapsed_time": "0:05:07", "remaining_time": "4:27:25"}
11
+ {"current_steps": 110, "total_steps": 5320, "loss": 1.354, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.992886717913358e-06, "epoch": 0.20676691729323307, "percentage": 2.07, "elapsed_time": "0:05:39", "remaining_time": "4:27:48"}
12
+ {"current_steps": 120, "total_steps": 5320, "loss": 1.3123, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.991218658821609e-06, "epoch": 0.22556390977443608, "percentage": 2.26, "elapsed_time": "0:06:11", "remaining_time": "4:28:02"}
13
+ {"current_steps": 130, "total_steps": 5320, "loss": 1.3205, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.989375230525849e-06, "epoch": 0.24436090225563908, "percentage": 2.44, "elapsed_time": "0:06:38", "remaining_time": "4:25:24"}
14
+ {"current_steps": 140, "total_steps": 5320, "loss": 1.3522, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.987356497795944e-06, "epoch": 0.2631578947368421, "percentage": 2.63, "elapsed_time": "0:07:12", "remaining_time": "4:26:27"}
15
+ {"current_steps": 150, "total_steps": 5320, "loss": 1.2833, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.985162531561174e-06, "epoch": 0.2819548872180451, "percentage": 2.82, "elapsed_time": "0:07:41", "remaining_time": "4:25:01"}
16
+ {"current_steps": 160, "total_steps": 5320, "loss": 1.328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.982793408907747e-06, "epoch": 0.3007518796992481, "percentage": 3.01, "elapsed_time": "0:08:10", "remaining_time": "4:23:27"}
17
+ {"current_steps": 170, "total_steps": 5320, "loss": 1.3121, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.980249213076085e-06, "epoch": 0.31954887218045114, "percentage": 3.2, "elapsed_time": "0:08:39", "remaining_time": "4:22:30"}
18
+ {"current_steps": 180, "total_steps": 5320, "loss": 1.3388, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.977530033457906e-06, "epoch": 0.3383458646616541, "percentage": 3.38, "elapsed_time": "0:09:08", "remaining_time": "4:21:05"}
19
+ {"current_steps": 190, "total_steps": 5320, "loss": 1.3377, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.97463596559307e-06, "epoch": 0.35714285714285715, "percentage": 3.57, "elapsed_time": "0:09:41", "remaining_time": "4:21:42"}
20
+ {"current_steps": 200, "total_steps": 5320, "loss": 1.2614, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.971567111166246e-06, "epoch": 0.37593984962406013, "percentage": 3.76, "elapsed_time": "0:10:10", "remaining_time": "4:20:41"}
21
+ {"current_steps": 200, "total_steps": 5320, "loss": null, "eval_loss": 1.3072842359542847, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.37593984962406013, "percentage": 3.76, "elapsed_time": "0:10:10", "remaining_time": "4:20:41"}
22
+ {"current_steps": 210, "total_steps": 5320, "loss": 1.3147, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.968323578003312e-06, "epoch": 0.39473684210526316, "percentage": 3.95, "elapsed_time": "0:12:15", "remaining_time": "4:58:26"}
23
+ {"current_steps": 220, "total_steps": 5320, "loss": 1.2929, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.964905480067585e-06, "epoch": 0.41353383458646614, "percentage": 4.14, "elapsed_time": "0:12:48", "remaining_time": "4:56:56"}
24
+ {"current_steps": 230, "total_steps": 5320, "loss": 1.3393, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.961312937455812e-06, "epoch": 0.4323308270676692, "percentage": 4.32, "elapsed_time": "0:13:18", "remaining_time": "4:54:25"}
25
+ {"current_steps": 240, "total_steps": 5320, "loss": 1.3135, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.957546076393944e-06, "epoch": 0.45112781954887216, "percentage": 4.51, "elapsed_time": "0:13:48", "remaining_time": "4:52:26"}
26
+ {"current_steps": 250, "total_steps": 5320, "loss": 1.2597, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.95360502923271e-06, "epoch": 0.4699248120300752, "percentage": 4.7, "elapsed_time": "0:14:19", "remaining_time": "4:50:39"}
27
+ {"current_steps": 260, "total_steps": 5320, "loss": 1.2992, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.949489934442966e-06, "epoch": 0.48872180451127817, "percentage": 4.89, "elapsed_time": "0:14:48", "remaining_time": "4:48:04"}
28
+ {"current_steps": 270, "total_steps": 5320, "loss": 1.2996, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.945200936610821e-06, "epoch": 0.5075187969924813, "percentage": 5.08, "elapsed_time": "0:15:16", "remaining_time": "4:45:39"}
29
+ {"current_steps": 280, "total_steps": 5320, "loss": 1.3132, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.940738186432565e-06, "epoch": 0.5263157894736842, "percentage": 5.26, "elapsed_time": "0:15:46", "remaining_time": "4:43:55"}
30
+ {"current_steps": 290, "total_steps": 5320, "loss": 1.3046, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.936101840709373e-06, "epoch": 0.5451127819548872, "percentage": 5.45, "elapsed_time": "0:16:15", "remaining_time": "4:42:07"}
31
+ {"current_steps": 300, "total_steps": 5320, "loss": 1.2913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.931292062341793e-06, "epoch": 0.5639097744360902, "percentage": 5.64, "elapsed_time": "0:16:47", "remaining_time": "4:40:51"}
32
+ {"current_steps": 310, "total_steps": 5320, "loss": 1.3397, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.926309020324025e-06, "epoch": 0.5827067669172933, "percentage": 5.83, "elapsed_time": "0:17:15", "remaining_time": "4:38:55"}
33
+ {"current_steps": 320, "total_steps": 5320, "loss": 1.2682, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.921152889737985e-06, "epoch": 0.6015037593984962, "percentage": 6.02, "elapsed_time": "0:17:46", "remaining_time": "4:37:42"}
34
+ {"current_steps": 330, "total_steps": 5320, "loss": 1.3243, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.915823851747143e-06, "epoch": 0.6203007518796992, "percentage": 6.2, "elapsed_time": "0:18:16", "remaining_time": "4:36:25"}
35
+ {"current_steps": 340, "total_steps": 5320, "loss": 1.2911, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.910322093590177e-06, "epoch": 0.6390977443609023, "percentage": 6.39, "elapsed_time": "0:18:47", "remaining_time": "4:35:21"}
36
+ {"current_steps": 350, "total_steps": 5320, "loss": 1.2426, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.90464780857437e-06, "epoch": 0.6578947368421053, "percentage": 6.58, "elapsed_time": "0:19:15", "remaining_time": "4:33:22"}
37
+ {"current_steps": 360, "total_steps": 5320, "loss": 1.2459, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.898801196068839e-06, "epoch": 0.6766917293233082, "percentage": 6.77, "elapsed_time": "0:19:46", "remaining_time": "4:32:30"}
38
+ {"current_steps": 370, "total_steps": 5320, "loss": 1.3258, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.892782461497521e-06, "epoch": 0.6954887218045113, "percentage": 6.95, "elapsed_time": "0:20:20", "remaining_time": "4:32:04"}
39
+ {"current_steps": 380, "total_steps": 5320, "loss": 1.2751, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.886591816331953e-06, "epoch": 0.7142857142857143, "percentage": 7.14, "elapsed_time": "0:20:47", "remaining_time": "4:30:21"}
40
+ {"current_steps": 390, "total_steps": 5320, "loss": 1.2523, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.880229478083849e-06, "epoch": 0.7330827067669173, "percentage": 7.33, "elapsed_time": "0:21:19", "remaining_time": "4:29:34"}
41
+ {"current_steps": 400, "total_steps": 5320, "loss": 1.3527, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.87369567029745e-06, "epoch": 0.7518796992481203, "percentage": 7.52, "elapsed_time": "0:21:51", "remaining_time": "4:28:54"}
42
+ {"current_steps": 400, "total_steps": 5320, "loss": null, "eval_loss": 1.28571617603302, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.7518796992481203, "percentage": 7.52, "elapsed_time": "0:21:51", "remaining_time": "4:28:54"}
43
+ {"current_steps": 410, "total_steps": 5320, "loss": 1.2655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.866990622541677e-06, "epoch": 0.7706766917293233, "percentage": 7.71, "elapsed_time": "0:23:54", "remaining_time": "4:46:15"}
44
+ {"current_steps": 420, "total_steps": 5320, "loss": 1.2699, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.860114570402055e-06, "epoch": 0.7894736842105263, "percentage": 7.89, "elapsed_time": "0:24:25", "remaining_time": "4:45:01"}
45
+ {"current_steps": 430, "total_steps": 5320, "loss": 1.3128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.853067755472447e-06, "epoch": 0.8082706766917294, "percentage": 8.08, "elapsed_time": "0:24:54", "remaining_time": "4:43:14"}
46
+ {"current_steps": 440, "total_steps": 5320, "loss": 1.2984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.845850425346563e-06, "epoch": 0.8270676691729323, "percentage": 8.27, "elapsed_time": "0:25:24", "remaining_time": "4:41:52"}
47
+ {"current_steps": 450, "total_steps": 5320, "loss": 1.3103, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.838462833609249e-06, "epoch": 0.8458646616541353, "percentage": 8.46, "elapsed_time": "0:25:54", "remaining_time": "4:40:26"}
48
+ {"current_steps": 460, "total_steps": 5320, "loss": 1.2631, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.830905239827592e-06, "epoch": 0.8646616541353384, "percentage": 8.65, "elapsed_time": "0:26:24", "remaining_time": "4:39:02"}
49
+ {"current_steps": 470, "total_steps": 5320, "loss": 1.2547, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.823177909541795e-06, "epoch": 0.8834586466165414, "percentage": 8.83, "elapsed_time": "0:26:56", "remaining_time": "4:37:57"}
50
+ {"current_steps": 480, "total_steps": 5320, "loss": 1.2982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.815281114255841e-06, "epoch": 0.9022556390977443, "percentage": 9.02, "elapsed_time": "0:27:28", "remaining_time": "4:36:58"}
51
+ {"current_steps": 490, "total_steps": 5320, "loss": 1.2738, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.807215131427966e-06, "epoch": 0.9210526315789473, "percentage": 9.21, "elapsed_time": "0:27:54", "remaining_time": "4:35:06"}
52
+ {"current_steps": 500, "total_steps": 5320, "loss": 1.2883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.798980244460892e-06, "epoch": 0.9398496240601504, "percentage": 9.4, "elapsed_time": "0:28:25", "remaining_time": "4:33:57"}
53
+ {"current_steps": 510, "total_steps": 5320, "loss": 1.2624, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.790576742691895e-06, "epoch": 0.9586466165413534, "percentage": 9.59, "elapsed_time": "0:28:56", "remaining_time": "4:33:00"}
54
+ {"current_steps": 520, "total_steps": 5320, "loss": 1.2346, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.782004921382612e-06, "epoch": 0.9774436090225563, "percentage": 9.77, "elapsed_time": "0:29:27", "remaining_time": "4:31:52"}
55
+ {"current_steps": 530, "total_steps": 5320, "loss": 1.2918, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.773265081708687e-06, "epoch": 0.9962406015037594, "percentage": 9.96, "elapsed_time": "0:29:57", "remaining_time": "4:30:49"}
56
+ {"current_steps": 540, "total_steps": 5320, "loss": 1.2432, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.764357530749178e-06, "epoch": 1.0150375939849625, "percentage": 10.15, "elapsed_time": "0:30:28", "remaining_time": "4:29:41"}
57
+ {"current_steps": 550, "total_steps": 5320, "loss": 1.3564, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.755282581475769e-06, "epoch": 1.0338345864661653, "percentage": 10.34, "elapsed_time": "0:31:00", "remaining_time": "4:28:54"}
58
+ {"current_steps": 560, "total_steps": 5320, "loss": 1.2317, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.74604055274178e-06, "epoch": 1.0526315789473684, "percentage": 10.53, "elapsed_time": "0:31:32", "remaining_time": "4:28:02"}
59
+ {"current_steps": 570, "total_steps": 5320, "loss": 1.2586, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.736631769270958e-06, "epoch": 1.0714285714285714, "percentage": 10.71, "elapsed_time": "0:31:59", "remaining_time": "4:26:33"}
60
+ {"current_steps": 580, "total_steps": 5320, "loss": 1.2914, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.727056561646067e-06, "epoch": 1.0902255639097744, "percentage": 10.9, "elapsed_time": "0:32:25", "remaining_time": "4:24:58"}
61
+ {"current_steps": 590, "total_steps": 5320, "loss": 1.1981, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.717315266297277e-06, "epoch": 1.1090225563909775, "percentage": 11.09, "elapsed_time": "0:32:54", "remaining_time": "4:23:45"}
62
+ {"current_steps": 600, "total_steps": 5320, "loss": 1.239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.707408225490343e-06, "epoch": 1.1278195488721805, "percentage": 11.28, "elapsed_time": "0:33:26", "remaining_time": "4:23:07"}
63
+ {"current_steps": 600, "total_steps": 5320, "loss": null, "eval_loss": 1.274794340133667, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.1278195488721805, "percentage": 11.28, "elapsed_time": "0:33:26", "remaining_time": "4:23:07"}
64
+ {"current_steps": 610, "total_steps": 5320, "loss": 1.3017, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.697335787314573e-06, "epoch": 1.1466165413533835, "percentage": 11.47, "elapsed_time": "0:35:31", "remaining_time": "4:34:18"}
65
+ {"current_steps": 620, "total_steps": 5320, "loss": 1.2571, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.687098305670606e-06, "epoch": 1.1654135338345863, "percentage": 11.65, "elapsed_time": "0:36:04", "remaining_time": "4:33:24"}
66
+ {"current_steps": 630, "total_steps": 5320, "loss": 1.3266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.676696140257969e-06, "epoch": 1.1842105263157894, "percentage": 11.84, "elapsed_time": "0:36:36", "remaining_time": "4:32:31"}
67
+ {"current_steps": 640, "total_steps": 5320, "loss": 1.2692, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.66612965656245e-06, "epoch": 1.2030075187969924, "percentage": 12.03, "elapsed_time": "0:37:07", "remaining_time": "4:31:27"}
68
+ {"current_steps": 650, "total_steps": 5320, "loss": 1.2982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.655399225843244e-06, "epoch": 1.2218045112781954, "percentage": 12.22, "elapsed_time": "0:37:36", "remaining_time": "4:30:14"}
69
+ {"current_steps": 660, "total_steps": 5320, "loss": 1.2372, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.644505225119922e-06, "epoch": 1.2406015037593985, "percentage": 12.41, "elapsed_time": "0:38:08", "remaining_time": "4:29:17"}
70
+ {"current_steps": 670, "total_steps": 5320, "loss": 1.2541, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.633448037159167e-06, "epoch": 1.2593984962406015, "percentage": 12.59, "elapsed_time": "0:38:36", "remaining_time": "4:27:55"}
71
+ {"current_steps": 680, "total_steps": 5320, "loss": 1.2387, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.622228050461345e-06, "epoch": 1.2781954887218046, "percentage": 12.78, "elapsed_time": "0:39:07", "remaining_time": "4:26:57"}
72
+ {"current_steps": 690, "total_steps": 5320, "loss": 1.2468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.610845659246833e-06, "epoch": 1.2969924812030076, "percentage": 12.97, "elapsed_time": "0:39:38", "remaining_time": "4:25:57"}
73
+ {"current_steps": 700, "total_steps": 5320, "loss": 1.2913, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.599301263442194e-06, "epoch": 1.3157894736842106, "percentage": 13.16, "elapsed_time": "0:40:08", "remaining_time": "4:24:55"}
74
+ {"current_steps": 710, "total_steps": 5320, "loss": 1.2588, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.587595268666099e-06, "epoch": 1.3345864661654137, "percentage": 13.35, "elapsed_time": "0:40:40", "remaining_time": "4:24:05"}
75
+ {"current_steps": 720, "total_steps": 5320, "loss": 1.2759, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.575728086215093e-06, "epoch": 1.3533834586466165, "percentage": 13.53, "elapsed_time": "0:41:10", "remaining_time": "4:23:04"}
76
+ {"current_steps": 730, "total_steps": 5320, "loss": 1.2981, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.56370013304914e-06, "epoch": 1.3721804511278195, "percentage": 13.72, "elapsed_time": "0:41:41", "remaining_time": "4:22:07"}
77
+ {"current_steps": 740, "total_steps": 5320, "loss": 1.2511, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.551511831776966e-06, "epoch": 1.3909774436090225, "percentage": 13.91, "elapsed_time": "0:42:10", "remaining_time": "4:21:00"}
78
+ {"current_steps": 750, "total_steps": 5320, "loss": 1.2008, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.53916361064122e-06, "epoch": 1.4097744360902256, "percentage": 14.1, "elapsed_time": "0:42:41", "remaining_time": "4:20:07"}
79
+ {"current_steps": 760, "total_steps": 5320, "loss": 1.2803, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.526655903503423e-06, "epoch": 1.4285714285714286, "percentage": 14.29, "elapsed_time": "0:43:11", "remaining_time": "4:19:08"}
80
+ {"current_steps": 770, "total_steps": 5320, "loss": 1.2487, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.513989149828718e-06, "epoch": 1.4473684210526316, "percentage": 14.47, "elapsed_time": "0:43:42", "remaining_time": "4:18:15"}
81
+ {"current_steps": 780, "total_steps": 5320, "loss": 1.1717, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.501163794670445e-06, "epoch": 1.4661654135338344, "percentage": 14.66, "elapsed_time": "0:44:08", "remaining_time": "4:16:54"}
82
+ {"current_steps": 790, "total_steps": 5320, "loss": 1.2217, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.488180288654485e-06, "epoch": 1.4849624060150375, "percentage": 14.85, "elapsed_time": "0:44:39", "remaining_time": "4:16:05"}
83
+ {"current_steps": 800, "total_steps": 5320, "loss": 1.2647, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.475039087963443e-06, "epoch": 1.5037593984962405, "percentage": 15.04, "elapsed_time": "0:45:10", "remaining_time": "4:15:14"}
84
+ {"current_steps": 800, "total_steps": 5320, "loss": null, "eval_loss": 1.2675856351852417, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.5037593984962405, "percentage": 15.04, "elapsed_time": "0:45:10", "remaining_time": "4:15:14"}
85
+ {"current_steps": 810, "total_steps": 5320, "loss": 1.2283, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.461740654320608e-06, "epoch": 1.5225563909774436, "percentage": 15.23, "elapsed_time": "0:47:15", "remaining_time": "4:23:05"}
86
+ {"current_steps": 820, "total_steps": 5320, "loss": 1.2758, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.448285454973739e-06, "epoch": 1.5413533834586466, "percentage": 15.41, "elapsed_time": "0:47:44", "remaining_time": "4:22:00"}
87
+ {"current_steps": 830, "total_steps": 5320, "loss": 1.2634, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.434673962678638e-06, "epoch": 1.5601503759398496, "percentage": 15.6, "elapsed_time": "0:48:12", "remaining_time": "4:20:46"}
88
+ {"current_steps": 840, "total_steps": 5320, "loss": 1.2268, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.420906655682553e-06, "epoch": 1.5789473684210527, "percentage": 15.79, "elapsed_time": "0:48:42", "remaining_time": "4:19:46"}
89
+ {"current_steps": 850, "total_steps": 5320, "loss": 1.3011, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.40698401770736e-06, "epoch": 1.5977443609022557, "percentage": 15.98, "elapsed_time": "0:49:12", "remaining_time": "4:18:49"}
90
+ {"current_steps": 860, "total_steps": 5320, "loss": 1.2172, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.392906537932582e-06, "epoch": 1.6165413533834587, "percentage": 16.17, "elapsed_time": "0:49:47", "remaining_time": "4:18:14"}
91
+ {"current_steps": 870, "total_steps": 5320, "loss": 1.2692, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.378674710978185e-06, "epoch": 1.6353383458646618, "percentage": 16.35, "elapsed_time": "0:50:17", "remaining_time": "4:17:12"}
92
+ {"current_steps": 880, "total_steps": 5320, "loss": 1.2359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.364289036887214e-06, "epoch": 1.6541353383458648, "percentage": 16.54, "elapsed_time": "0:50:46", "remaining_time": "4:16:10"}
93
+ {"current_steps": 890, "total_steps": 5320, "loss": 1.2031, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.349750021108212e-06, "epoch": 1.6729323308270678, "percentage": 16.73, "elapsed_time": "0:51:15", "remaining_time": "4:15:09"}
94
+ {"current_steps": 900, "total_steps": 5320, "loss": 1.2768, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.335058174477472e-06, "epoch": 1.6917293233082706, "percentage": 16.92, "elapsed_time": "0:51:47", "remaining_time": "4:14:23"}
95
+ {"current_steps": 910, "total_steps": 5320, "loss": 1.1899, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.320214013201079e-06, "epoch": 1.7105263157894737, "percentage": 17.11, "elapsed_time": "0:52:14", "remaining_time": "4:13:12"}
96
+ {"current_steps": 920, "total_steps": 5320, "loss": 1.2266, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.305218058836778e-06, "epoch": 1.7293233082706767, "percentage": 17.29, "elapsed_time": "0:52:42", "remaining_time": "4:12:04"}
97
+ {"current_steps": 930, "total_steps": 5320, "loss": 1.2872, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.290070838275649e-06, "epoch": 1.7481203007518797, "percentage": 17.48, "elapsed_time": "0:53:14", "remaining_time": "4:11:19"}
98
+ {"current_steps": 940, "total_steps": 5320, "loss": 1.2469, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.274772883723587e-06, "epoch": 1.7669172932330826, "percentage": 17.67, "elapsed_time": "0:53:45", "remaining_time": "4:10:27"}
99
+ {"current_steps": 950, "total_steps": 5320, "loss": 1.2872, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.259324732682615e-06, "epoch": 1.7857142857142856, "percentage": 17.86, "elapsed_time": "0:54:16", "remaining_time": "4:09:41"}
100
+ {"current_steps": 960, "total_steps": 5320, "loss": 1.2179, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.24372692793199e-06, "epoch": 1.8045112781954886, "percentage": 18.05, "elapsed_time": "0:54:43", "remaining_time": "4:08:34"}
101
+ {"current_steps": 970, "total_steps": 5320, "loss": 1.2497, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.22798001750913e-06, "epoch": 1.8233082706766917, "percentage": 18.23, "elapsed_time": "0:55:12", "remaining_time": "4:07:34"}
102
+ {"current_steps": 980, "total_steps": 5320, "loss": 1.2534, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.21208455469037e-06, "epoch": 1.8421052631578947, "percentage": 18.42, "elapsed_time": "0:55:41", "remaining_time": "4:06:39"}
103
+ {"current_steps": 990, "total_steps": 5320, "loss": 1.3171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.196041097971509e-06, "epoch": 1.8609022556390977, "percentage": 18.61, "elapsed_time": "0:56:13", "remaining_time": "4:05:53"}
104
+ {"current_steps": 1000, "total_steps": 5320, "loss": 1.307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.179850211048193e-06, "epoch": 1.8796992481203008, "percentage": 18.8, "elapsed_time": "0:56:43", "remaining_time": "4:05:04"}
105
+ {"current_steps": 1000, "total_steps": 5320, "loss": null, "eval_loss": 1.2614006996154785, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.8796992481203008, "percentage": 18.8, "elapsed_time": "0:56:43", "remaining_time": "4:05:04"}
106
+ {"current_steps": 1010, "total_steps": 5320, "loss": 1.2613, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.163512462796113e-06, "epoch": 1.8984962406015038, "percentage": 18.98, "elapsed_time": "0:58:48", "remaining_time": "4:10:56"}
107
+ {"current_steps": 1020, "total_steps": 5320, "loss": 1.242, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.14702842725101e-06, "epoch": 1.9172932330827068, "percentage": 19.17, "elapsed_time": "0:59:15", "remaining_time": "4:09:50"}
108
+ {"current_steps": 1030, "total_steps": 5320, "loss": 1.268, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.13039868358851e-06, "epoch": 1.9360902255639099, "percentage": 19.36, "elapsed_time": "0:59:46", "remaining_time": "4:08:57"}
109
+ {"current_steps": 1040, "total_steps": 5320, "loss": 1.2116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.113623816103775e-06, "epoch": 1.954887218045113, "percentage": 19.55, "elapsed_time": "1:00:16", "remaining_time": "4:08:04"}
110
+ {"current_steps": 1050, "total_steps": 5320, "loss": 1.2582, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.09670441419097e-06, "epoch": 1.973684210526316, "percentage": 19.74, "elapsed_time": "1:00:49", "remaining_time": "4:07:22"}
111
+ {"current_steps": 1060, "total_steps": 5320, "loss": 1.2643, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.079641072322555e-06, "epoch": 1.9924812030075187, "percentage": 19.92, "elapsed_time": "1:01:20", "remaining_time": "4:06:30"}
112
+ {"current_steps": 1070, "total_steps": 5320, "loss": 1.2027, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.062434390028407e-06, "epoch": 2.011278195488722, "percentage": 20.11, "elapsed_time": "1:01:55", "remaining_time": "4:05:57"}
113
+ {"current_steps": 1080, "total_steps": 5320, "loss": 1.2506, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.045084971874738e-06, "epoch": 2.030075187969925, "percentage": 20.3, "elapsed_time": "1:02:27", "remaining_time": "4:05:12"}
114
+ {"current_steps": 1090, "total_steps": 5320, "loss": 1.2196, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.027593427442867e-06, "epoch": 2.0488721804511276, "percentage": 20.49, "elapsed_time": "1:02:57", "remaining_time": "4:04:21"}
115
+ {"current_steps": 1100, "total_steps": 5320, "loss": 1.261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.009960371307798e-06, "epoch": 2.0676691729323307, "percentage": 20.68, "elapsed_time": "1:03:27", "remaining_time": "4:03:27"}
116
+ {"current_steps": 1110, "total_steps": 5320, "loss": 1.2229, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.992186423016626e-06, "epoch": 2.0864661654135337, "percentage": 20.86, "elapsed_time": "1:03:58", "remaining_time": "4:02:39"}
117
+ {"current_steps": 1120, "total_steps": 5320, "loss": 1.1843, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.974272207066767e-06, "epoch": 2.1052631578947367, "percentage": 21.05, "elapsed_time": "1:04:31", "remaining_time": "4:01:57"}
118
+ {"current_steps": 1130, "total_steps": 5320, "loss": 1.2195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.956218352884022e-06, "epoch": 2.1240601503759398, "percentage": 21.24, "elapsed_time": "1:05:03", "remaining_time": "4:01:12"}
119
+ {"current_steps": 1140, "total_steps": 5320, "loss": 1.2789, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.938025494800454e-06, "epoch": 2.142857142857143, "percentage": 21.43, "elapsed_time": "1:05:34", "remaining_time": "4:00:25"}
120
+ {"current_steps": 1150, "total_steps": 5320, "loss": 1.1721, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.919694272032108e-06, "epoch": 2.161654135338346, "percentage": 21.62, "elapsed_time": "1:06:05", "remaining_time": "3:59:37"}
121
+ {"current_steps": 1160, "total_steps": 5320, "loss": 1.2334, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.901225328656543e-06, "epoch": 2.180451127819549, "percentage": 21.8, "elapsed_time": "1:06:38", "remaining_time": "3:58:58"}
122
+ {"current_steps": 1170, "total_steps": 5320, "loss": 1.2439, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.882619313590212e-06, "epoch": 2.199248120300752, "percentage": 21.99, "elapsed_time": "1:07:08", "remaining_time": "3:58:08"}
123
+ {"current_steps": 1180, "total_steps": 5320, "loss": 1.1926, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.863876880565656e-06, "epoch": 2.218045112781955, "percentage": 22.18, "elapsed_time": "1:07:37", "remaining_time": "3:57:16"}
124
+ {"current_steps": 1190, "total_steps": 5320, "loss": 1.191, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.844998688108535e-06, "epoch": 2.236842105263158, "percentage": 22.37, "elapsed_time": "1:08:05", "remaining_time": "3:56:17"}
125
+ {"current_steps": 1200, "total_steps": 5320, "loss": 1.207, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.825985399514488e-06, "epoch": 2.255639097744361, "percentage": 22.56, "elapsed_time": "1:08:34", "remaining_time": "3:55:27"}
126
+ {"current_steps": 1200, "total_steps": 5320, "loss": null, "eval_loss": 1.2564018964767456, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.255639097744361, "percentage": 22.56, "elapsed_time": "1:08:34", "remaining_time": "3:55:27"}
127
+ {"current_steps": 1210, "total_steps": 5320, "loss": 1.2238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.806837682825835e-06, "epoch": 2.274436090225564, "percentage": 22.74, "elapsed_time": "1:10:37", "remaining_time": "3:59:54"}
128
+ {"current_steps": 1220, "total_steps": 5320, "loss": 1.1689, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.787556210808101e-06, "epoch": 2.293233082706767, "percentage": 22.93, "elapsed_time": "1:11:07", "remaining_time": "3:59:02"}
129
+ {"current_steps": 1230, "total_steps": 5320, "loss": 1.253, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.768141660926375e-06, "epoch": 2.31203007518797, "percentage": 23.12, "elapsed_time": "1:11:38", "remaining_time": "3:58:11"}
130
+ {"current_steps": 1240, "total_steps": 5320, "loss": 1.1906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.748594715321512e-06, "epoch": 2.3308270676691727, "percentage": 23.31, "elapsed_time": "1:12:10", "remaining_time": "3:57:28"}
131
+ {"current_steps": 1250, "total_steps": 5320, "loss": 1.1598, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.728916060786162e-06, "epoch": 2.3496240601503757, "percentage": 23.5, "elapsed_time": "1:12:40", "remaining_time": "3:56:36"}
132
+ {"current_steps": 1260, "total_steps": 5320, "loss": 1.2064, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.70910638874064e-06, "epoch": 2.3684210526315788, "percentage": 23.68, "elapsed_time": "1:13:08", "remaining_time": "3:55:41"}
133
+ {"current_steps": 1270, "total_steps": 5320, "loss": 1.2097, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.689166395208638e-06, "epoch": 2.387218045112782, "percentage": 23.87, "elapsed_time": "1:13:39", "remaining_time": "3:54:54"}
134
+ {"current_steps": 1280, "total_steps": 5320, "loss": 1.2016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.669096780792754e-06, "epoch": 2.406015037593985, "percentage": 24.06, "elapsed_time": "1:14:07", "remaining_time": "3:53:57"}
135
+ {"current_steps": 1290, "total_steps": 5320, "loss": 1.2202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.6488982506499e-06, "epoch": 2.424812030075188, "percentage": 24.25, "elapsed_time": "1:14:35", "remaining_time": "3:53:01"}
136
+ {"current_steps": 1300, "total_steps": 5320, "loss": 1.2591, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.628571514466502e-06, "epoch": 2.443609022556391, "percentage": 24.44, "elapsed_time": "1:15:04", "remaining_time": "3:52:09"}
137
+ {"current_steps": 1310, "total_steps": 5320, "loss": 1.1778, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.608117286433583e-06, "epoch": 2.462406015037594, "percentage": 24.62, "elapsed_time": "1:15:36", "remaining_time": "3:51:27"}
138
+ {"current_steps": 1320, "total_steps": 5320, "loss": 1.1876, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.587536285221656e-06, "epoch": 2.481203007518797, "percentage": 24.81, "elapsed_time": "1:16:06", "remaining_time": "3:50:38"}
139
+ {"current_steps": 1330, "total_steps": 5320, "loss": 1.2413, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.566829233955484e-06, "epoch": 2.5, "percentage": 25.0, "elapsed_time": "1:16:37", "remaining_time": "3:49:53"}
140
+ {"current_steps": 1340, "total_steps": 5320, "loss": 1.2581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.545996860188668e-06, "epoch": 2.518796992481203, "percentage": 25.19, "elapsed_time": "1:17:06", "remaining_time": "3:49:00"}
141
+ {"current_steps": 1350, "total_steps": 5320, "loss": 1.2328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.525039895878078e-06, "epoch": 2.537593984962406, "percentage": 25.38, "elapsed_time": "1:17:34", "remaining_time": "3:48:08"}
142
+ {"current_steps": 1360, "total_steps": 5320, "loss": 1.2026, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.503959077358143e-06, "epoch": 2.556390977443609, "percentage": 25.56, "elapsed_time": "1:18:07", "remaining_time": "3:47:30"}
143
+ {"current_steps": 1370, "total_steps": 5320, "loss": 1.2398, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.482755145314987e-06, "epoch": 2.575187969924812, "percentage": 25.75, "elapsed_time": "1:18:38", "remaining_time": "3:46:43"}
144
+ {"current_steps": 1380, "total_steps": 5320, "loss": 1.2265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.46142884476038e-06, "epoch": 2.593984962406015, "percentage": 25.94, "elapsed_time": "1:19:07", "remaining_time": "3:45:53"}
145
+ {"current_steps": 1390, "total_steps": 5320, "loss": 1.1805, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.439980925005587e-06, "epoch": 2.612781954887218, "percentage": 26.13, "elapsed_time": "1:19:39", "remaining_time": "3:45:13"}
146
+ {"current_steps": 1400, "total_steps": 5320, "loss": 1.2654, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.418412139635026e-06, "epoch": 2.6315789473684212, "percentage": 26.32, "elapsed_time": "1:20:10", "remaining_time": "3:44:29"}
147
+ {"current_steps": 1400, "total_steps": 5320, "loss": null, "eval_loss": 1.2535929679870605, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.6315789473684212, "percentage": 26.32, "elapsed_time": "1:20:10", "remaining_time": "3:44:29"}
148
+ {"current_steps": 1410, "total_steps": 5320, "loss": 1.2129, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.396723246479798e-06, "epoch": 2.6503759398496243, "percentage": 26.5, "elapsed_time": "1:22:15", "remaining_time": "3:48:06"}
149
+ {"current_steps": 1420, "total_steps": 5320, "loss": 1.1882, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.374915007591053e-06, "epoch": 2.6691729323308273, "percentage": 26.69, "elapsed_time": "1:22:44", "remaining_time": "3:47:15"}
150
+ {"current_steps": 1430, "total_steps": 5320, "loss": 1.2261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.352988189213223e-06, "epoch": 2.6879699248120303, "percentage": 26.88, "elapsed_time": "1:23:15", "remaining_time": "3:46:28"}
151
+ {"current_steps": 1440, "total_steps": 5320, "loss": 1.2191, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.330943561757092e-06, "epoch": 2.706766917293233, "percentage": 27.07, "elapsed_time": "1:23:48", "remaining_time": "3:45:48"}
152
+ {"current_steps": 1450, "total_steps": 5320, "loss": 1.2234, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.308781899772731e-06, "epoch": 2.725563909774436, "percentage": 27.26, "elapsed_time": "1:24:17", "remaining_time": "3:44:57"}
153
+ {"current_steps": 1460, "total_steps": 5320, "loss": 1.2698, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.286503981922284e-06, "epoch": 2.744360902255639, "percentage": 27.44, "elapsed_time": "1:24:44", "remaining_time": "3:44:03"}
154
+ {"current_steps": 1470, "total_steps": 5320, "loss": 1.2, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.264110590952609e-06, "epoch": 2.763157894736842, "percentage": 27.63, "elapsed_time": "1:25:13", "remaining_time": "3:43:11"}
155
+ {"current_steps": 1480, "total_steps": 5320, "loss": 1.263, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.241602513667775e-06, "epoch": 2.781954887218045, "percentage": 27.82, "elapsed_time": "1:25:43", "remaining_time": "3:42:24"}
156
+ {"current_steps": 1490, "total_steps": 5320, "loss": 1.2454, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.218980540901417e-06, "epoch": 2.800751879699248, "percentage": 28.01, "elapsed_time": "1:26:13", "remaining_time": "3:41:38"}
157
+ {"current_steps": 1500, "total_steps": 5320, "loss": 1.2002, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.19624546748895e-06, "epoch": 2.819548872180451, "percentage": 28.2, "elapsed_time": "1:26:44", "remaining_time": "3:40:54"}
158
+ {"current_steps": 1510, "total_steps": 5320, "loss": 1.22, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.173398092239647e-06, "epoch": 2.838345864661654, "percentage": 28.38, "elapsed_time": "1:27:14", "remaining_time": "3:40:07"}
159
+ {"current_steps": 1520, "total_steps": 5320, "loss": 1.1997, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.150439217908557e-06, "epoch": 2.857142857142857, "percentage": 28.57, "elapsed_time": "1:27:42", "remaining_time": "3:39:17"}
160
+ {"current_steps": 1530, "total_steps": 5320, "loss": 1.2291, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.12736965116832e-06, "epoch": 2.8759398496240602, "percentage": 28.76, "elapsed_time": "1:28:13", "remaining_time": "3:38:32"}
161
+ {"current_steps": 1540, "total_steps": 5320, "loss": 1.2645, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.104190202580811e-06, "epoch": 2.8947368421052633, "percentage": 28.95, "elapsed_time": "1:28:43", "remaining_time": "3:37:47"}
162
+ {"current_steps": 1550, "total_steps": 5320, "loss": 1.212, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.080901686568664e-06, "epoch": 2.9135338345864663, "percentage": 29.14, "elapsed_time": "1:29:15", "remaining_time": "3:37:05"}
163
+ {"current_steps": 1560, "total_steps": 5320, "loss": 1.1731, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.057504921386661e-06, "epoch": 2.932330827067669, "percentage": 29.32, "elapsed_time": "1:29:45", "remaining_time": "3:36:20"}
164
+ {"current_steps": 1570, "total_steps": 5320, "loss": 1.2283, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.034000729092967e-06, "epoch": 2.951127819548872, "percentage": 29.51, "elapsed_time": "1:30:19", "remaining_time": "3:35:44"}
165
+ {"current_steps": 1580, "total_steps": 5320, "loss": 1.2285, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.010389935520269e-06, "epoch": 2.969924812030075, "percentage": 29.7, "elapsed_time": "1:30:48", "remaining_time": "3:34:55"}
166
+ {"current_steps": 1590, "total_steps": 5320, "loss": 1.2275, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.986673370246743e-06, "epoch": 2.988721804511278, "percentage": 29.89, "elapsed_time": "1:31:16", "remaining_time": "3:34:07"}
167
+ {"current_steps": 1600, "total_steps": 5320, "loss": 1.1963, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.962851866566912e-06, "epoch": 3.007518796992481, "percentage": 30.08, "elapsed_time": "1:31:45", "remaining_time": "3:33:20"}
168
+ {"current_steps": 1600, "total_steps": 5320, "loss": null, "eval_loss": 1.2510054111480713, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.007518796992481, "percentage": 30.08, "elapsed_time": "1:31:45", "remaining_time": "3:33:20"}
169
+ {"current_steps": 1610, "total_steps": 5320, "loss": 1.1984, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.938926261462366e-06, "epoch": 3.026315789473684, "percentage": 30.26, "elapsed_time": "1:33:47", "remaining_time": "3:36:07"}
170
+ {"current_steps": 1620, "total_steps": 5320, "loss": 1.183, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.914897395572362e-06, "epoch": 3.045112781954887, "percentage": 30.45, "elapsed_time": "1:34:17", "remaining_time": "3:35:22"}
171
+ {"current_steps": 1630, "total_steps": 5320, "loss": 1.1625, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.890766113164272e-06, "epoch": 3.06390977443609, "percentage": 30.64, "elapsed_time": "1:34:45", "remaining_time": "3:34:31"}
172
+ {"current_steps": 1640, "total_steps": 5320, "loss": 1.2114, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.866533262103937e-06, "epoch": 3.082706766917293, "percentage": 30.83, "elapsed_time": "1:35:19", "remaining_time": "3:33:54"}
173
+ {"current_steps": 1650, "total_steps": 5320, "loss": 1.1771, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.842199693825863e-06, "epoch": 3.101503759398496, "percentage": 31.02, "elapsed_time": "1:35:47", "remaining_time": "3:33:04"}
174
+ {"current_steps": 1660, "total_steps": 5320, "loss": 1.1924, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.817766263303312e-06, "epoch": 3.1203007518796992, "percentage": 31.2, "elapsed_time": "1:36:17", "remaining_time": "3:32:17"}
175
+ {"current_steps": 1670, "total_steps": 5320, "loss": 1.1873, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.793233829018263e-06, "epoch": 3.1390977443609023, "percentage": 31.39, "elapsed_time": "1:36:46", "remaining_time": "3:31:31"}
176
+ {"current_steps": 1680, "total_steps": 5320, "loss": 1.1504, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.768603252931243e-06, "epoch": 3.1578947368421053, "percentage": 31.58, "elapsed_time": "1:37:15", "remaining_time": "3:30:44"}
177
+ {"current_steps": 1690, "total_steps": 5320, "loss": 1.1758, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.743875400451047e-06, "epoch": 3.1766917293233083, "percentage": 31.77, "elapsed_time": "1:37:47", "remaining_time": "3:30:02"}
178
+ {"current_steps": 1700, "total_steps": 5320, "loss": 1.1254, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.719051140404327e-06, "epoch": 3.1954887218045114, "percentage": 31.95, "elapsed_time": "1:38:16", "remaining_time": "3:29:15"}
179
+ {"current_steps": 1710, "total_steps": 5320, "loss": 1.1965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.69413134500507e-06, "epoch": 3.2142857142857144, "percentage": 32.14, "elapsed_time": "1:38:44", "remaining_time": "3:28:27"}
180
+ {"current_steps": 1720, "total_steps": 5320, "loss": 1.1811, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.669116889823955e-06, "epoch": 3.2330827067669174, "percentage": 32.33, "elapsed_time": "1:39:14", "remaining_time": "3:27:42"}
181
+ {"current_steps": 1730, "total_steps": 5320, "loss": 1.1418, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.644008653757571e-06, "epoch": 3.2518796992481205, "percentage": 32.52, "elapsed_time": "1:39:44", "remaining_time": "3:26:57"}
182
+ {"current_steps": 1740, "total_steps": 5320, "loss": 1.1522, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.6188075189975644e-06, "epoch": 3.2706766917293235, "percentage": 32.71, "elapsed_time": "1:40:15", "remaining_time": "3:26:17"}
183
+ {"current_steps": 1750, "total_steps": 5320, "loss": 1.1975, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.593514370999617e-06, "epoch": 3.2894736842105265, "percentage": 32.89, "elapsed_time": "1:40:47", "remaining_time": "3:25:37"}
184
+ {"current_steps": 1760, "total_steps": 5320, "loss": 1.198, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.568130098452352e-06, "epoch": 3.308270676691729, "percentage": 33.08, "elapsed_time": "1:41:17", "remaining_time": "3:24:53"}
185
+ {"current_steps": 1770, "total_steps": 5320, "loss": 1.1596, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.542655593246103e-06, "epoch": 3.327067669172932, "percentage": 33.27, "elapsed_time": "1:41:47", "remaining_time": "3:24:09"}
186
+ {"current_steps": 1780, "total_steps": 5320, "loss": 1.2303, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.517091750441576e-06, "epoch": 3.345864661654135, "percentage": 33.46, "elapsed_time": "1:42:16", "remaining_time": "3:23:24"}
187
+ {"current_steps": 1790, "total_steps": 5320, "loss": 1.1841, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.491439468238404e-06, "epoch": 3.3646616541353382, "percentage": 33.65, "elapsed_time": "1:42:44", "remaining_time": "3:22:37"}
188
+ {"current_steps": 1800, "total_steps": 5320, "loss": 1.164, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.465699647943586e-06, "epoch": 3.3834586466165413, "percentage": 33.83, "elapsed_time": "1:43:20", "remaining_time": "3:22:04"}
189
+ {"current_steps": 1800, "total_steps": 5320, "loss": null, "eval_loss": 1.2509571313858032, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.3834586466165413, "percentage": 33.83, "elapsed_time": "1:43:20", "remaining_time": "3:22:04"}
190
+ {"current_steps": 1810, "total_steps": 5320, "loss": 1.1798, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.43987319393982e-06, "epoch": 3.4022556390977443, "percentage": 34.02, "elapsed_time": "1:45:24", "remaining_time": "3:24:25"}
191
+ {"current_steps": 1820, "total_steps": 5320, "loss": 1.1729, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.413961013653725e-06, "epoch": 3.4210526315789473, "percentage": 34.21, "elapsed_time": "1:45:56", "remaining_time": "3:23:44"}
192
+ {"current_steps": 1830, "total_steps": 5320, "loss": 1.2036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.387964017523964e-06, "epoch": 3.4398496240601504, "percentage": 34.4, "elapsed_time": "1:46:25", "remaining_time": "3:22:57"}
193
+ {"current_steps": 1840, "total_steps": 5320, "loss": 1.2009, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.361883118969248e-06, "epoch": 3.4586466165413534, "percentage": 34.59, "elapsed_time": "1:46:57", "remaining_time": "3:22:18"}
194
+ {"current_steps": 1850, "total_steps": 5320, "loss": 1.1395, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.335719234356245e-06, "epoch": 3.4774436090225564, "percentage": 34.77, "elapsed_time": "1:47:26", "remaining_time": "3:21:31"}
195
+ {"current_steps": 1860, "total_steps": 5320, "loss": 1.1893, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.309473282967387e-06, "epoch": 3.4962406015037595, "percentage": 34.96, "elapsed_time": "1:47:55", "remaining_time": "3:20:46"}
196
+ {"current_steps": 1870, "total_steps": 5320, "loss": 1.1934, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.283146186968566e-06, "epoch": 3.5150375939849625, "percentage": 35.15, "elapsed_time": "1:48:23", "remaining_time": "3:19:59"}
197
+ {"current_steps": 1880, "total_steps": 5320, "loss": 1.1819, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.256738871376733e-06, "epoch": 3.5338345864661656, "percentage": 35.34, "elapsed_time": "1:48:53", "remaining_time": "3:19:15"}
198
+ {"current_steps": 1890, "total_steps": 5320, "loss": 1.2371, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.230252264027398e-06, "epoch": 3.5526315789473686, "percentage": 35.53, "elapsed_time": "1:49:23", "remaining_time": "3:18:31"}
199
+ {"current_steps": 1900, "total_steps": 5320, "loss": 1.227, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.203687295542032e-06, "epoch": 3.571428571428571, "percentage": 35.71, "elapsed_time": "1:49:55", "remaining_time": "3:17:51"}
200
+ {"current_steps": 1910, "total_steps": 5320, "loss": 1.1845, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.1770448992953676e-06, "epoch": 3.590225563909774, "percentage": 35.9, "elapsed_time": "1:50:26", "remaining_time": "3:17:09"}
201
+ {"current_steps": 1920, "total_steps": 5320, "loss": 1.1571, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.1503260113826035e-06, "epoch": 3.6090225563909772, "percentage": 36.09, "elapsed_time": "1:50:56", "remaining_time": "3:16:27"}
202
+ {"current_steps": 1930, "total_steps": 5320, "loss": 1.2118, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.123531570586515e-06, "epoch": 3.6278195488721803, "percentage": 36.28, "elapsed_time": "1:51:29", "remaining_time": "3:15:49"}
203
+ {"current_steps": 1940, "total_steps": 5320, "loss": 1.2038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.09666251834447e-06, "epoch": 3.6466165413533833, "percentage": 36.47, "elapsed_time": "1:51:58", "remaining_time": "3:15:05"}
204
+ {"current_steps": 1950, "total_steps": 5320, "loss": 1.1971, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.069719798715347e-06, "epoch": 3.6654135338345863, "percentage": 36.65, "elapsed_time": "1:52:29", "remaining_time": "3:14:24"}
205
+ {"current_steps": 1960, "total_steps": 5320, "loss": 1.1426, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.042704358346375e-06, "epoch": 3.6842105263157894, "percentage": 36.84, "elapsed_time": "1:53:01", "remaining_time": "3:13:46"}
206
+ {"current_steps": 1970, "total_steps": 5320, "loss": 1.1678, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.015617146439863e-06, "epoch": 3.7030075187969924, "percentage": 37.03, "elapsed_time": "1:53:28", "remaining_time": "3:12:58"}
207
+ {"current_steps": 1980, "total_steps": 5320, "loss": 1.1454, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.988459114719849e-06, "epoch": 3.7218045112781954, "percentage": 37.22, "elapsed_time": "1:53:57", "remaining_time": "3:12:14"}
208
+ {"current_steps": 1990, "total_steps": 5320, "loss": 1.2414, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.9612312173986675e-06, "epoch": 3.7406015037593985, "percentage": 37.41, "elapsed_time": "1:54:30", "remaining_time": "3:11:36"}
209
+ {"current_steps": 2000, "total_steps": 5320, "loss": 1.2174, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.933934411143419e-06, "epoch": 3.7593984962406015, "percentage": 37.59, "elapsed_time": "1:54:58", "remaining_time": "3:10:51"}
210
+ {"current_steps": 2000, "total_steps": 5320, "loss": null, "eval_loss": 1.2493139505386353, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.7593984962406015, "percentage": 37.59, "elapsed_time": "1:54:58", "remaining_time": "3:10:51"}
211
+ {"current_steps": 2010, "total_steps": 5320, "loss": 1.1772, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.906569655042357e-06, "epoch": 3.7781954887218046, "percentage": 37.78, "elapsed_time": "1:57:03", "remaining_time": "3:12:46"}
212
+ {"current_steps": 2020, "total_steps": 5320, "loss": 1.1805, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.879137910571191e-06, "epoch": 3.7969924812030076, "percentage": 37.97, "elapsed_time": "1:57:35", "remaining_time": "3:12:05"}
213
+ {"current_steps": 2030, "total_steps": 5320, "loss": 1.2065, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.8516401415593005e-06, "epoch": 3.8157894736842106, "percentage": 38.16, "elapsed_time": "1:58:10", "remaining_time": "3:11:31"}
214
+ {"current_steps": 2040, "total_steps": 5320, "loss": 1.157, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.824077314155877e-06, "epoch": 3.8345864661654137, "percentage": 38.35, "elapsed_time": "1:58:38", "remaining_time": "3:10:44"}
215
+ {"current_steps": 2050, "total_steps": 5320, "loss": 1.1635, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.7964503967959705e-06, "epoch": 3.8533834586466167, "percentage": 38.53, "elapsed_time": "1:59:11", "remaining_time": "3:10:06"}
216
+ {"current_steps": 2060, "total_steps": 5320, "loss": 1.1307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.768760360166471e-06, "epoch": 3.8721804511278197, "percentage": 38.72, "elapsed_time": "1:59:40", "remaining_time": "3:09:23"}
217
+ {"current_steps": 2070, "total_steps": 5320, "loss": 1.1858, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.741008177171995e-06, "epoch": 3.8909774436090228, "percentage": 38.91, "elapsed_time": "2:00:10", "remaining_time": "3:08:41"}
218
+ {"current_steps": 2080, "total_steps": 5320, "loss": 1.1718, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.713194822900707e-06, "epoch": 3.909774436090226, "percentage": 39.1, "elapsed_time": "2:00:40", "remaining_time": "3:07:58"}
219
+ {"current_steps": 2090, "total_steps": 5320, "loss": 1.1452, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.6853212745900585e-06, "epoch": 3.928571428571429, "percentage": 39.29, "elapsed_time": "2:01:13", "remaining_time": "3:07:20"}
220
+ {"current_steps": 2100, "total_steps": 5320, "loss": 1.1107, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.657388511592453e-06, "epoch": 3.9473684210526314, "percentage": 39.47, "elapsed_time": "2:01:43", "remaining_time": "3:06:39"}
221
+ {"current_steps": 2110, "total_steps": 5320, "loss": 1.1833, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.62939751534083e-06, "epoch": 3.9661654135338344, "percentage": 39.66, "elapsed_time": "2:02:11", "remaining_time": "3:05:53"}
222
+ {"current_steps": 2120, "total_steps": 5320, "loss": 1.1474, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.601349269314188e-06, "epoch": 3.9849624060150375, "percentage": 39.85, "elapsed_time": "2:02:40", "remaining_time": "3:05:10"}
223
+ {"current_steps": 2130, "total_steps": 5320, "loss": 1.1439, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.573244759003033e-06, "epoch": 4.003759398496241, "percentage": 40.04, "elapsed_time": "2:03:10", "remaining_time": "3:04:28"}
224
+ {"current_steps": 2140, "total_steps": 5320, "loss": 1.1256, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.545084971874738e-06, "epoch": 4.022556390977444, "percentage": 40.23, "elapsed_time": "2:03:43", "remaining_time": "3:03:51"}
225
+ {"current_steps": 2150, "total_steps": 5320, "loss": 1.1084, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.516870897338864e-06, "epoch": 4.041353383458647, "percentage": 40.41, "elapsed_time": "2:04:14", "remaining_time": "3:03:10"}
226
+ {"current_steps": 2160, "total_steps": 5320, "loss": 1.1515, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.488603526712391e-06, "epoch": 4.06015037593985, "percentage": 40.6, "elapsed_time": "2:04:46", "remaining_time": "3:02:32"}
227
+ {"current_steps": 2170, "total_steps": 5320, "loss": 1.1498, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.46028385318488e-06, "epoch": 4.078947368421052, "percentage": 40.79, "elapsed_time": "2:05:15", "remaining_time": "3:01:49"}
228
+ {"current_steps": 2180, "total_steps": 5320, "loss": 1.1487, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.431912871783587e-06, "epoch": 4.097744360902255, "percentage": 40.98, "elapsed_time": "2:05:46", "remaining_time": "3:01:10"}
229
+ {"current_steps": 2190, "total_steps": 5320, "loss": 1.1616, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.4034915793385e-06, "epoch": 4.116541353383458, "percentage": 41.17, "elapsed_time": "2:06:16", "remaining_time": "3:00:28"}
230
+ {"current_steps": 2200, "total_steps": 5320, "loss": 1.1286, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.3750209744473105e-06, "epoch": 4.135338345864661, "percentage": 41.35, "elapsed_time": "2:06:46", "remaining_time": "2:59:48"}
231
+ {"current_steps": 2200, "total_steps": 5320, "loss": null, "eval_loss": 1.2525601387023926, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.135338345864661, "percentage": 41.35, "elapsed_time": "2:06:46", "remaining_time": "2:59:48"}
232
+ {"current_steps": 2210, "total_steps": 5320, "loss": 1.1263, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.346502057440327e-06, "epoch": 4.154135338345864, "percentage": 41.54, "elapsed_time": "2:08:48", "remaining_time": "3:01:15"}
233
+ {"current_steps": 2220, "total_steps": 5320, "loss": 1.0954, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.3179358303453386e-06, "epoch": 4.172932330827067, "percentage": 41.73, "elapsed_time": "2:09:19", "remaining_time": "3:00:34"}
234
+ {"current_steps": 2230, "total_steps": 5320, "loss": 1.0753, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.289323296852393e-06, "epoch": 4.19172932330827, "percentage": 41.92, "elapsed_time": "2:09:45", "remaining_time": "2:59:48"}
235
+ {"current_steps": 2240, "total_steps": 5320, "loss": 1.1932, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.260665462278544e-06, "epoch": 4.2105263157894735, "percentage": 42.11, "elapsed_time": "2:10:17", "remaining_time": "2:59:08"}
236
+ {"current_steps": 2250, "total_steps": 5320, "loss": 1.1761, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.231963333532516e-06, "epoch": 4.2293233082706765, "percentage": 42.29, "elapsed_time": "2:10:49", "remaining_time": "2:58:29"}
237
+ {"current_steps": 2260, "total_steps": 5320, "loss": 1.1748, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.203217919079343e-06, "epoch": 4.2481203007518795, "percentage": 42.48, "elapsed_time": "2:11:21", "remaining_time": "2:57:51"}
238
+ {"current_steps": 2270, "total_steps": 5320, "loss": 1.1325, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.17443022890492e-06, "epoch": 4.2669172932330826, "percentage": 42.67, "elapsed_time": "2:11:50", "remaining_time": "2:57:09"}
239
+ {"current_steps": 2280, "total_steps": 5320, "loss": 1.1644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.145601274480521e-06, "epoch": 4.285714285714286, "percentage": 42.86, "elapsed_time": "2:12:20", "remaining_time": "2:56:27"}
240
+ {"current_steps": 2290, "total_steps": 5320, "loss": 1.2016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.116732068727271e-06, "epoch": 4.304511278195489, "percentage": 43.05, "elapsed_time": "2:12:49", "remaining_time": "2:55:44"}
241
+ {"current_steps": 2300, "total_steps": 5320, "loss": 1.1828, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.08782362598054e-06, "epoch": 4.323308270676692, "percentage": 43.23, "elapsed_time": "2:13:19", "remaining_time": "2:55:03"}
242
+ {"current_steps": 2310, "total_steps": 5320, "loss": 1.1837, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.058876961954308e-06, "epoch": 4.342105263157895, "percentage": 43.42, "elapsed_time": "2:13:50", "remaining_time": "2:54:23"}
243
+ {"current_steps": 2320, "total_steps": 5320, "loss": 1.1689, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.029893093705492e-06, "epoch": 4.360902255639098, "percentage": 43.61, "elapsed_time": "2:14:19", "remaining_time": "2:53:41"}
244
+ {"current_steps": 2330, "total_steps": 5320, "loss": 1.1043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.0008730395981905e-06, "epoch": 4.379699248120301, "percentage": 43.8, "elapsed_time": "2:14:49", "remaining_time": "2:53:00"}
245
+ {"current_steps": 2340, "total_steps": 5320, "loss": 1.162, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.971817819267914e-06, "epoch": 4.398496240601504, "percentage": 43.98, "elapsed_time": "2:15:20", "remaining_time": "2:52:20"}
246
+ {"current_steps": 2350, "total_steps": 5320, "loss": 1.1986, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.9427284535857585e-06, "epoch": 4.417293233082707, "percentage": 44.17, "elapsed_time": "2:15:47", "remaining_time": "2:51:37"}
247
+ {"current_steps": 2360, "total_steps": 5320, "loss": 1.1752, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.9136059646225375e-06, "epoch": 4.43609022556391, "percentage": 44.36, "elapsed_time": "2:16:18", "remaining_time": "2:50:57"}
248
+ {"current_steps": 2370, "total_steps": 5320, "loss": 1.1221, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.884451375612865e-06, "epoch": 4.454887218045113, "percentage": 44.55, "elapsed_time": "2:16:50", "remaining_time": "2:50:19"}
249
+ {"current_steps": 2380, "total_steps": 5320, "loss": 1.0907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.855265710919211e-06, "epoch": 4.473684210526316, "percentage": 44.74, "elapsed_time": "2:17:20", "remaining_time": "2:49:39"}
250
+ {"current_steps": 2390, "total_steps": 5320, "loss": 1.1429, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.826049995995905e-06, "epoch": 4.492481203007519, "percentage": 44.92, "elapsed_time": "2:17:53", "remaining_time": "2:49:02"}
251
+ {"current_steps": 2400, "total_steps": 5320, "loss": 1.1467, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.796805257353109e-06, "epoch": 4.511278195488722, "percentage": 45.11, "elapsed_time": "2:18:24", "remaining_time": "2:48:24"}
252
+ {"current_steps": 2400, "total_steps": 5320, "loss": null, "eval_loss": 1.255226731300354, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.511278195488722, "percentage": 45.11, "elapsed_time": "2:18:24", "remaining_time": "2:48:24"}
253
+ {"current_steps": 2410, "total_steps": 5320, "loss": 1.0982, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.767532522520746e-06, "epoch": 4.530075187969925, "percentage": 45.3, "elapsed_time": "2:20:30", "remaining_time": "2:49:39"}
254
+ {"current_steps": 2420, "total_steps": 5320, "loss": 1.1655, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.738232820012407e-06, "epoch": 4.548872180451128, "percentage": 45.49, "elapsed_time": "2:21:01", "remaining_time": "2:48:59"}
255
+ {"current_steps": 2430, "total_steps": 5320, "loss": 1.1364, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.7089071792892e-06, "epoch": 4.567669172932331, "percentage": 45.68, "elapsed_time": "2:21:30", "remaining_time": "2:48:18"}
256
+ {"current_steps": 2440, "total_steps": 5320, "loss": 1.0997, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.679556630723592e-06, "epoch": 4.586466165413534, "percentage": 45.86, "elapsed_time": "2:22:02", "remaining_time": "2:47:38"}
257
+ {"current_steps": 2450, "total_steps": 5320, "loss": 1.0726, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.6501822055631976e-06, "epoch": 4.605263157894737, "percentage": 46.05, "elapsed_time": "2:22:34", "remaining_time": "2:47:01"}
258
+ {"current_steps": 2460, "total_steps": 5320, "loss": 1.0923, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.620784935894548e-06, "epoch": 4.62406015037594, "percentage": 46.24, "elapsed_time": "2:23:03", "remaining_time": "2:46:19"}
259
+ {"current_steps": 2470, "total_steps": 5320, "loss": 1.1551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.591365854606829e-06, "epoch": 4.642857142857143, "percentage": 46.43, "elapsed_time": "2:23:35", "remaining_time": "2:45:41"}
260
+ {"current_steps": 2480, "total_steps": 5320, "loss": 1.1109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.561925995355595e-06, "epoch": 4.661654135338345, "percentage": 46.62, "elapsed_time": "2:24:04", "remaining_time": "2:44:59"}
261
+ {"current_steps": 2490, "total_steps": 5320, "loss": 1.0835, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.532466392526439e-06, "epoch": 4.680451127819548, "percentage": 46.8, "elapsed_time": "2:24:33", "remaining_time": "2:44:18"}
262
+ {"current_steps": 2500, "total_steps": 5320, "loss": 1.109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.5029880811986546e-06, "epoch": 4.6992481203007515, "percentage": 46.99, "elapsed_time": "2:25:00", "remaining_time": "2:43:34"}
263
+ {"current_steps": 2510, "total_steps": 5320, "loss": 1.1409, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.4734920971088766e-06, "epoch": 4.7180451127819545, "percentage": 47.18, "elapsed_time": "2:25:32", "remaining_time": "2:42:56"}
264
+ {"current_steps": 2520, "total_steps": 5320, "loss": 1.171, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.443979476614674e-06, "epoch": 4.7368421052631575, "percentage": 47.37, "elapsed_time": "2:26:04", "remaining_time": "2:42:18"}
265
+ {"current_steps": 2530, "total_steps": 5320, "loss": 1.1815, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.4144512566581495e-06, "epoch": 4.7556390977443606, "percentage": 47.56, "elapsed_time": "2:26:34", "remaining_time": "2:41:37"}
266
+ {"current_steps": 2540, "total_steps": 5320, "loss": 1.1535, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.384908474729501e-06, "epoch": 4.774436090225564, "percentage": 47.74, "elapsed_time": "2:27:05", "remaining_time": "2:40:59"}
267
+ {"current_steps": 2550, "total_steps": 5320, "loss": 1.1426, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.3553521688305655e-06, "epoch": 4.793233082706767, "percentage": 47.93, "elapsed_time": "2:27:37", "remaining_time": "2:40:22"}
268
+ {"current_steps": 2560, "total_steps": 5320, "loss": 1.1248, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.325783377438357e-06, "epoch": 4.81203007518797, "percentage": 48.12, "elapsed_time": "2:28:07", "remaining_time": "2:39:42"}
269
+ {"current_steps": 2570, "total_steps": 5320, "loss": 1.079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.296203139468572e-06, "epoch": 4.830827067669173, "percentage": 48.31, "elapsed_time": "2:28:37", "remaining_time": "2:39:01"}
270
+ {"current_steps": 2580, "total_steps": 5320, "loss": 1.0826, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.266612494239088e-06, "epoch": 4.849624060150376, "percentage": 48.5, "elapsed_time": "2:29:06", "remaining_time": "2:38:21"}
271
+ {"current_steps": 2590, "total_steps": 5320, "loss": 1.1501, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.23701248143345e-06, "epoch": 4.868421052631579, "percentage": 48.68, "elapsed_time": "2:29:36", "remaining_time": "2:37:41"}
272
+ {"current_steps": 2600, "total_steps": 5320, "loss": 1.1357, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.207404141064334e-06, "epoch": 4.887218045112782, "percentage": 48.87, "elapsed_time": "2:30:07", "remaining_time": "2:37:02"}
273
+ {"current_steps": 2600, "total_steps": 5320, "loss": null, "eval_loss": 1.2556278705596924, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.887218045112782, "percentage": 48.87, "elapsed_time": "2:30:07", "remaining_time": "2:37:02"}
274
+ {"current_steps": 2610, "total_steps": 5320, "loss": 1.1404, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.177788513437013e-06, "epoch": 4.906015037593985, "percentage": 49.06, "elapsed_time": "2:32:07", "remaining_time": "2:37:57"}
275
+ {"current_steps": 2620, "total_steps": 5320, "loss": 1.141, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.148166639112799e-06, "epoch": 4.924812030075188, "percentage": 49.25, "elapsed_time": "2:32:35", "remaining_time": "2:37:15"}
276
+ {"current_steps": 2630, "total_steps": 5320, "loss": 1.1371, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.118539558872489e-06, "epoch": 4.943609022556391, "percentage": 49.44, "elapsed_time": "2:33:05", "remaining_time": "2:36:35"}
277
+ {"current_steps": 2640, "total_steps": 5320, "loss": 1.1056, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.088908313679788e-06, "epoch": 4.962406015037594, "percentage": 49.62, "elapsed_time": "2:33:34", "remaining_time": "2:35:54"}
278
+ {"current_steps": 2650, "total_steps": 5320, "loss": 1.1125, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.059273944644742e-06, "epoch": 4.981203007518797, "percentage": 49.81, "elapsed_time": "2:34:04", "remaining_time": "2:35:14"}
279
+ {"current_steps": 2660, "total_steps": 5320, "loss": 1.1029, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.029637492987153e-06, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "2:34:32", "remaining_time": "2:34:32"}
280
+ {"current_steps": 2670, "total_steps": 5320, "loss": 1.1127, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-06, "epoch": 5.018796992481203, "percentage": 50.19, "elapsed_time": "2:35:06", "remaining_time": "2:33:57"}
281
+ {"current_steps": 2680, "total_steps": 5320, "loss": 1.1489, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.970362507012848e-06, "epoch": 5.037593984962406, "percentage": 50.38, "elapsed_time": "2:35:39", "remaining_time": "2:33:19"}
282
+ {"current_steps": 2690, "total_steps": 5320, "loss": 1.1115, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.940726055355259e-06, "epoch": 5.056390977443609, "percentage": 50.56, "elapsed_time": "2:36:08", "remaining_time": "2:32:39"}
283
+ {"current_steps": 2700, "total_steps": 5320, "loss": 1.1115, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.911091686320213e-06, "epoch": 5.075187969924812, "percentage": 50.75, "elapsed_time": "2:36:38", "remaining_time": "2:31:59"}
284
+ {"current_steps": 2710, "total_steps": 5320, "loss": 1.0303, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.881460441127513e-06, "epoch": 5.093984962406015, "percentage": 50.94, "elapsed_time": "2:37:07", "remaining_time": "2:31:19"}
285
+ {"current_steps": 2720, "total_steps": 5320, "loss": 1.1397, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8518333608872015e-06, "epoch": 5.112781954887218, "percentage": 51.13, "elapsed_time": "2:37:38", "remaining_time": "2:30:41"}
286
+ {"current_steps": 2730, "total_steps": 5320, "loss": 1.1045, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.822211486562989e-06, "epoch": 5.131578947368421, "percentage": 51.32, "elapsed_time": "2:38:07", "remaining_time": "2:30:00"}
287
+ {"current_steps": 2740, "total_steps": 5320, "loss": 1.12, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.792595858935668e-06, "epoch": 5.150375939849624, "percentage": 51.5, "elapsed_time": "2:38:38", "remaining_time": "2:29:22"}
288
+ {"current_steps": 2750, "total_steps": 5320, "loss": 1.0694, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7629875185665505e-06, "epoch": 5.169172932330827, "percentage": 51.69, "elapsed_time": "2:39:10", "remaining_time": "2:28:44"}
289
+ {"current_steps": 2760, "total_steps": 5320, "loss": 1.0619, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.733387505760913e-06, "epoch": 5.18796992481203, "percentage": 51.88, "elapsed_time": "2:39:43", "remaining_time": "2:28:08"}
290
+ {"current_steps": 2770, "total_steps": 5320, "loss": 1.0368, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.703796860531429e-06, "epoch": 5.206766917293233, "percentage": 52.07, "elapsed_time": "2:40:15", "remaining_time": "2:27:31"}
291
+ {"current_steps": 2780, "total_steps": 5320, "loss": 1.0728, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.674216622561645e-06, "epoch": 5.225563909774436, "percentage": 52.26, "elapsed_time": "2:40:45", "remaining_time": "2:26:52"}
292
+ {"current_steps": 2790, "total_steps": 5320, "loss": 1.1404, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.644647831169435e-06, "epoch": 5.2443609022556394, "percentage": 52.44, "elapsed_time": "2:41:14", "remaining_time": "2:26:13"}
293
+ {"current_steps": 2800, "total_steps": 5320, "loss": 1.0742, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6150915252705005e-06, "epoch": 5.2631578947368425, "percentage": 52.63, "elapsed_time": "2:41:42", "remaining_time": "2:25:32"}
294
+ {"current_steps": 2800, "total_steps": 5320, "loss": null, "eval_loss": 1.2630378007888794, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.2631578947368425, "percentage": 52.63, "elapsed_time": "2:41:42", "remaining_time": "2:25:32"}
295
+ {"current_steps": 2800, "total_steps": 5320, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.2631578947368425, "percentage": 52.63, "elapsed_time": "2:41:42", "remaining_time": "2:25:32"}
296
+ {"current_steps": 94, "total_steps": 94, "loss": null, "eval_loss": 1.2493139505386353, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.2631578947368425, "percentage": 100.0, "elapsed_time": "2:44:55", "remaining_time": "0:00:00"}
llama3_8b_peft/cnn_dailymail/trainer_state.json ADDED
@@ -0,0 +1,2102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.2493139505386353,
3
+ "best_model_checkpoint": "ckpt/llama3_8b_fuze27_no_sys/cnn_dailymail_no_sys/checkpoint-2000",
4
+ "epoch": 5.2631578947368425,
5
+ "eval_steps": 200,
6
+ "global_step": 2800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.018796992481203006,
13
+ "grad_norm": 0.9447252154350281,
14
+ "learning_rate": 5e-06,
15
+ "loss": 1.8478,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.03759398496240601,
20
+ "grad_norm": 1.001190185546875,
21
+ "learning_rate": 1e-05,
22
+ "loss": 1.8363,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.05639097744360902,
27
+ "grad_norm": 1.1996679306030273,
28
+ "learning_rate": 9.999912161129377e-06,
29
+ "loss": 1.7987,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.07518796992481203,
34
+ "grad_norm": 1.1520044803619385,
35
+ "learning_rate": 9.999648647603774e-06,
36
+ "loss": 1.6542,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.09398496240601503,
41
+ "grad_norm": 1.017522931098938,
42
+ "learning_rate": 9.999209468681885e-06,
43
+ "loss": 1.589,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.11278195488721804,
48
+ "grad_norm": 0.9699854254722595,
49
+ "learning_rate": 9.998594639794502e-06,
50
+ "loss": 1.4517,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.13157894736842105,
55
+ "grad_norm": 1.0790051221847534,
56
+ "learning_rate": 9.997804182543973e-06,
57
+ "loss": 1.368,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.15037593984962405,
62
+ "grad_norm": 0.8582977652549744,
63
+ "learning_rate": 9.996838124703448e-06,
64
+ "loss": 1.3354,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.16917293233082706,
69
+ "grad_norm": 0.7901042103767395,
70
+ "learning_rate": 9.995696500215899e-06,
71
+ "loss": 1.3313,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.18796992481203006,
76
+ "grad_norm": 0.8881024122238159,
77
+ "learning_rate": 9.994379349192927e-06,
78
+ "loss": 1.3679,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.20676691729323307,
83
+ "grad_norm": 0.9176166653633118,
84
+ "learning_rate": 9.992886717913358e-06,
85
+ "loss": 1.354,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.22556390977443608,
90
+ "grad_norm": 1.022005558013916,
91
+ "learning_rate": 9.991218658821609e-06,
92
+ "loss": 1.3123,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.24436090225563908,
97
+ "grad_norm": 0.8615936636924744,
98
+ "learning_rate": 9.989375230525849e-06,
99
+ "loss": 1.3205,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.2631578947368421,
104
+ "grad_norm": 0.9969698190689087,
105
+ "learning_rate": 9.987356497795944e-06,
106
+ "loss": 1.3522,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.2819548872180451,
111
+ "grad_norm": 1.0495631694793701,
112
+ "learning_rate": 9.985162531561174e-06,
113
+ "loss": 1.2833,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.3007518796992481,
118
+ "grad_norm": 0.9491264224052429,
119
+ "learning_rate": 9.982793408907747e-06,
120
+ "loss": 1.328,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.31954887218045114,
125
+ "grad_norm": 1.0747370719909668,
126
+ "learning_rate": 9.980249213076085e-06,
127
+ "loss": 1.3121,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.3383458646616541,
132
+ "grad_norm": 1.0547435283660889,
133
+ "learning_rate": 9.977530033457906e-06,
134
+ "loss": 1.3388,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.35714285714285715,
139
+ "grad_norm": 0.9469263553619385,
140
+ "learning_rate": 9.97463596559307e-06,
141
+ "loss": 1.3377,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.37593984962406013,
146
+ "grad_norm": 0.9396130442619324,
147
+ "learning_rate": 9.971567111166246e-06,
148
+ "loss": 1.2614,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.37593984962406013,
153
+ "eval_loss": 1.3072842359542847,
154
+ "eval_runtime": 93.0091,
155
+ "eval_samples_per_second": 16.127,
156
+ "eval_steps_per_second": 1.011,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.39473684210526316,
161
+ "grad_norm": 1.0297470092773438,
162
+ "learning_rate": 9.968323578003312e-06,
163
+ "loss": 1.3147,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.41353383458646614,
168
+ "grad_norm": 1.037048101425171,
169
+ "learning_rate": 9.964905480067585e-06,
170
+ "loss": 1.2929,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.4323308270676692,
175
+ "grad_norm": 0.8472989201545715,
176
+ "learning_rate": 9.961312937455812e-06,
177
+ "loss": 1.3393,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.45112781954887216,
182
+ "grad_norm": 0.9269481301307678,
183
+ "learning_rate": 9.957546076393944e-06,
184
+ "loss": 1.3135,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.4699248120300752,
189
+ "grad_norm": 1.0031790733337402,
190
+ "learning_rate": 9.95360502923271e-06,
191
+ "loss": 1.2597,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.48872180451127817,
196
+ "grad_norm": 1.0126962661743164,
197
+ "learning_rate": 9.949489934442966e-06,
198
+ "loss": 1.2992,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.5075187969924813,
203
+ "grad_norm": 0.9087000489234924,
204
+ "learning_rate": 9.945200936610821e-06,
205
+ "loss": 1.2996,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.5263157894736842,
210
+ "grad_norm": 0.9785706996917725,
211
+ "learning_rate": 9.940738186432565e-06,
212
+ "loss": 1.3132,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.5451127819548872,
217
+ "grad_norm": 1.2311068773269653,
218
+ "learning_rate": 9.936101840709373e-06,
219
+ "loss": 1.3046,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.5639097744360902,
224
+ "grad_norm": 1.1493617296218872,
225
+ "learning_rate": 9.931292062341793e-06,
226
+ "loss": 1.2913,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.5827067669172933,
231
+ "grad_norm": 1.0180079936981201,
232
+ "learning_rate": 9.926309020324025e-06,
233
+ "loss": 1.3397,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.6015037593984962,
238
+ "grad_norm": 1.0271568298339844,
239
+ "learning_rate": 9.921152889737985e-06,
240
+ "loss": 1.2682,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.6203007518796992,
245
+ "grad_norm": 1.0226733684539795,
246
+ "learning_rate": 9.915823851747143e-06,
247
+ "loss": 1.3243,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.6390977443609023,
252
+ "grad_norm": 1.213356852531433,
253
+ "learning_rate": 9.910322093590177e-06,
254
+ "loss": 1.2911,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.6578947368421053,
259
+ "grad_norm": 1.0010030269622803,
260
+ "learning_rate": 9.90464780857437e-06,
261
+ "loss": 1.2426,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.6766917293233082,
266
+ "grad_norm": 1.0011094808578491,
267
+ "learning_rate": 9.898801196068839e-06,
268
+ "loss": 1.2459,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.6954887218045113,
273
+ "grad_norm": 1.1239278316497803,
274
+ "learning_rate": 9.892782461497521e-06,
275
+ "loss": 1.3258,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.7142857142857143,
280
+ "grad_norm": 1.0707839727401733,
281
+ "learning_rate": 9.886591816331953e-06,
282
+ "loss": 1.2751,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.7330827067669173,
287
+ "grad_norm": 1.0116987228393555,
288
+ "learning_rate": 9.880229478083849e-06,
289
+ "loss": 1.2523,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 0.7518796992481203,
294
+ "grad_norm": 1.1805694103240967,
295
+ "learning_rate": 9.87369567029745e-06,
296
+ "loss": 1.3527,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 0.7518796992481203,
301
+ "eval_loss": 1.28571617603302,
302
+ "eval_runtime": 92.9385,
303
+ "eval_samples_per_second": 16.14,
304
+ "eval_steps_per_second": 1.011,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 0.7706766917293233,
309
+ "grad_norm": 1.2910244464874268,
310
+ "learning_rate": 9.866990622541677e-06,
311
+ "loss": 1.2655,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 0.7894736842105263,
316
+ "grad_norm": 1.024816870689392,
317
+ "learning_rate": 9.860114570402055e-06,
318
+ "loss": 1.2699,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 0.8082706766917294,
323
+ "grad_norm": 1.062069058418274,
324
+ "learning_rate": 9.853067755472447e-06,
325
+ "loss": 1.3128,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 0.8270676691729323,
330
+ "grad_norm": 1.0718345642089844,
331
+ "learning_rate": 9.845850425346563e-06,
332
+ "loss": 1.2984,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 0.8458646616541353,
337
+ "grad_norm": 1.06083083152771,
338
+ "learning_rate": 9.838462833609249e-06,
339
+ "loss": 1.3103,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 0.8646616541353384,
344
+ "grad_norm": 1.0429376363754272,
345
+ "learning_rate": 9.830905239827592e-06,
346
+ "loss": 1.2631,
347
+ "step": 460
348
+ },
349
+ {
350
+ "epoch": 0.8834586466165414,
351
+ "grad_norm": 1.2880390882492065,
352
+ "learning_rate": 9.823177909541795e-06,
353
+ "loss": 1.2547,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 0.9022556390977443,
358
+ "grad_norm": 1.0343599319458008,
359
+ "learning_rate": 9.815281114255841e-06,
360
+ "loss": 1.2982,
361
+ "step": 480
362
+ },
363
+ {
364
+ "epoch": 0.9210526315789473,
365
+ "grad_norm": 1.11343252658844,
366
+ "learning_rate": 9.807215131427966e-06,
367
+ "loss": 1.2738,
368
+ "step": 490
369
+ },
370
+ {
371
+ "epoch": 0.9398496240601504,
372
+ "grad_norm": 1.1720097064971924,
373
+ "learning_rate": 9.798980244460892e-06,
374
+ "loss": 1.2883,
375
+ "step": 500
376
+ },
377
+ {
378
+ "epoch": 0.9586466165413534,
379
+ "grad_norm": 1.292350172996521,
380
+ "learning_rate": 9.790576742691895e-06,
381
+ "loss": 1.2624,
382
+ "step": 510
383
+ },
384
+ {
385
+ "epoch": 0.9774436090225563,
386
+ "grad_norm": 1.0651817321777344,
387
+ "learning_rate": 9.782004921382612e-06,
388
+ "loss": 1.2346,
389
+ "step": 520
390
+ },
391
+ {
392
+ "epoch": 0.9962406015037594,
393
+ "grad_norm": 1.3379793167114258,
394
+ "learning_rate": 9.773265081708687e-06,
395
+ "loss": 1.2918,
396
+ "step": 530
397
+ },
398
+ {
399
+ "epoch": 1.0150375939849625,
400
+ "grad_norm": 1.0775865316390991,
401
+ "learning_rate": 9.764357530749178e-06,
402
+ "loss": 1.2432,
403
+ "step": 540
404
+ },
405
+ {
406
+ "epoch": 1.0338345864661653,
407
+ "grad_norm": 1.080871820449829,
408
+ "learning_rate": 9.755282581475769e-06,
409
+ "loss": 1.3564,
410
+ "step": 550
411
+ },
412
+ {
413
+ "epoch": 1.0526315789473684,
414
+ "grad_norm": 1.0247849225997925,
415
+ "learning_rate": 9.74604055274178e-06,
416
+ "loss": 1.2317,
417
+ "step": 560
418
+ },
419
+ {
420
+ "epoch": 1.0714285714285714,
421
+ "grad_norm": 1.1171594858169556,
422
+ "learning_rate": 9.736631769270958e-06,
423
+ "loss": 1.2586,
424
+ "step": 570
425
+ },
426
+ {
427
+ "epoch": 1.0902255639097744,
428
+ "grad_norm": 1.033004641532898,
429
+ "learning_rate": 9.727056561646067e-06,
430
+ "loss": 1.2914,
431
+ "step": 580
432
+ },
433
+ {
434
+ "epoch": 1.1090225563909775,
435
+ "grad_norm": 1.1016192436218262,
436
+ "learning_rate": 9.717315266297277e-06,
437
+ "loss": 1.1981,
438
+ "step": 590
439
+ },
440
+ {
441
+ "epoch": 1.1278195488721805,
442
+ "grad_norm": 1.187591552734375,
443
+ "learning_rate": 9.707408225490343e-06,
444
+ "loss": 1.239,
445
+ "step": 600
446
+ },
447
+ {
448
+ "epoch": 1.1278195488721805,
449
+ "eval_loss": 1.274794340133667,
450
+ "eval_runtime": 92.9901,
451
+ "eval_samples_per_second": 16.131,
452
+ "eval_steps_per_second": 1.011,
453
+ "step": 600
454
+ },
455
+ {
456
+ "epoch": 1.1466165413533835,
457
+ "grad_norm": 1.2362220287322998,
458
+ "learning_rate": 9.697335787314573e-06,
459
+ "loss": 1.3017,
460
+ "step": 610
461
+ },
462
+ {
463
+ "epoch": 1.1654135338345863,
464
+ "grad_norm": 1.1133874654769897,
465
+ "learning_rate": 9.687098305670606e-06,
466
+ "loss": 1.2571,
467
+ "step": 620
468
+ },
469
+ {
470
+ "epoch": 1.1842105263157894,
471
+ "grad_norm": 1.3375511169433594,
472
+ "learning_rate": 9.676696140257969e-06,
473
+ "loss": 1.3266,
474
+ "step": 630
475
+ },
476
+ {
477
+ "epoch": 1.2030075187969924,
478
+ "grad_norm": 1.292880654335022,
479
+ "learning_rate": 9.66612965656245e-06,
480
+ "loss": 1.2692,
481
+ "step": 640
482
+ },
483
+ {
484
+ "epoch": 1.2218045112781954,
485
+ "grad_norm": 1.320142149925232,
486
+ "learning_rate": 9.655399225843244e-06,
487
+ "loss": 1.2982,
488
+ "step": 650
489
+ },
490
+ {
491
+ "epoch": 1.2406015037593985,
492
+ "grad_norm": 1.1245112419128418,
493
+ "learning_rate": 9.644505225119922e-06,
494
+ "loss": 1.2372,
495
+ "step": 660
496
+ },
497
+ {
498
+ "epoch": 1.2593984962406015,
499
+ "grad_norm": 1.157261848449707,
500
+ "learning_rate": 9.633448037159167e-06,
501
+ "loss": 1.2541,
502
+ "step": 670
503
+ },
504
+ {
505
+ "epoch": 1.2781954887218046,
506
+ "grad_norm": 1.1512020826339722,
507
+ "learning_rate": 9.622228050461345e-06,
508
+ "loss": 1.2387,
509
+ "step": 680
510
+ },
511
+ {
512
+ "epoch": 1.2969924812030076,
513
+ "grad_norm": 1.450369954109192,
514
+ "learning_rate": 9.610845659246833e-06,
515
+ "loss": 1.2468,
516
+ "step": 690
517
+ },
518
+ {
519
+ "epoch": 1.3157894736842106,
520
+ "grad_norm": 1.3643982410430908,
521
+ "learning_rate": 9.599301263442194e-06,
522
+ "loss": 1.2913,
523
+ "step": 700
524
+ },
525
+ {
526
+ "epoch": 1.3345864661654137,
527
+ "grad_norm": 1.3036562204360962,
528
+ "learning_rate": 9.587595268666099e-06,
529
+ "loss": 1.2588,
530
+ "step": 710
531
+ },
532
+ {
533
+ "epoch": 1.3533834586466165,
534
+ "grad_norm": 1.476068139076233,
535
+ "learning_rate": 9.575728086215093e-06,
536
+ "loss": 1.2759,
537
+ "step": 720
538
+ },
539
+ {
540
+ "epoch": 1.3721804511278195,
541
+ "grad_norm": 1.300525426864624,
542
+ "learning_rate": 9.56370013304914e-06,
543
+ "loss": 1.2981,
544
+ "step": 730
545
+ },
546
+ {
547
+ "epoch": 1.3909774436090225,
548
+ "grad_norm": 1.4140195846557617,
549
+ "learning_rate": 9.551511831776966e-06,
550
+ "loss": 1.2511,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 1.4097744360902256,
555
+ "grad_norm": 1.386645793914795,
556
+ "learning_rate": 9.53916361064122e-06,
557
+ "loss": 1.2008,
558
+ "step": 750
559
+ },
560
+ {
561
+ "epoch": 1.4285714285714286,
562
+ "grad_norm": 1.295055866241455,
563
+ "learning_rate": 9.526655903503423e-06,
564
+ "loss": 1.2803,
565
+ "step": 760
566
+ },
567
+ {
568
+ "epoch": 1.4473684210526316,
569
+ "grad_norm": 1.4234346151351929,
570
+ "learning_rate": 9.513989149828718e-06,
571
+ "loss": 1.2487,
572
+ "step": 770
573
+ },
574
+ {
575
+ "epoch": 1.4661654135338344,
576
+ "grad_norm": 1.3746626377105713,
577
+ "learning_rate": 9.501163794670445e-06,
578
+ "loss": 1.1717,
579
+ "step": 780
580
+ },
581
+ {
582
+ "epoch": 1.4849624060150375,
583
+ "grad_norm": 1.3255406618118286,
584
+ "learning_rate": 9.488180288654485e-06,
585
+ "loss": 1.2217,
586
+ "step": 790
587
+ },
588
+ {
589
+ "epoch": 1.5037593984962405,
590
+ "grad_norm": 1.2310757637023926,
591
+ "learning_rate": 9.475039087963443e-06,
592
+ "loss": 1.2647,
593
+ "step": 800
594
+ },
595
+ {
596
+ "epoch": 1.5037593984962405,
597
+ "eval_loss": 1.2675856351852417,
598
+ "eval_runtime": 93.6094,
599
+ "eval_samples_per_second": 16.024,
600
+ "eval_steps_per_second": 1.004,
601
+ "step": 800
602
+ },
603
+ {
604
+ "epoch": 1.5225563909774436,
605
+ "grad_norm": 1.314813494682312,
606
+ "learning_rate": 9.461740654320608e-06,
607
+ "loss": 1.2283,
608
+ "step": 810
609
+ },
610
+ {
611
+ "epoch": 1.5413533834586466,
612
+ "grad_norm": 1.353016972541809,
613
+ "learning_rate": 9.448285454973739e-06,
614
+ "loss": 1.2758,
615
+ "step": 820
616
+ },
617
+ {
618
+ "epoch": 1.5601503759398496,
619
+ "grad_norm": 1.3047956228256226,
620
+ "learning_rate": 9.434673962678638e-06,
621
+ "loss": 1.2634,
622
+ "step": 830
623
+ },
624
+ {
625
+ "epoch": 1.5789473684210527,
626
+ "grad_norm": 1.3261934518814087,
627
+ "learning_rate": 9.420906655682553e-06,
628
+ "loss": 1.2268,
629
+ "step": 840
630
+ },
631
+ {
632
+ "epoch": 1.5977443609022557,
633
+ "grad_norm": 1.2587268352508545,
634
+ "learning_rate": 9.40698401770736e-06,
635
+ "loss": 1.3011,
636
+ "step": 850
637
+ },
638
+ {
639
+ "epoch": 1.6165413533834587,
640
+ "grad_norm": 1.2187191247940063,
641
+ "learning_rate": 9.392906537932582e-06,
642
+ "loss": 1.2172,
643
+ "step": 860
644
+ },
645
+ {
646
+ "epoch": 1.6353383458646618,
647
+ "grad_norm": 1.2722861766815186,
648
+ "learning_rate": 9.378674710978185e-06,
649
+ "loss": 1.2692,
650
+ "step": 870
651
+ },
652
+ {
653
+ "epoch": 1.6541353383458648,
654
+ "grad_norm": 1.3142369985580444,
655
+ "learning_rate": 9.364289036887214e-06,
656
+ "loss": 1.2359,
657
+ "step": 880
658
+ },
659
+ {
660
+ "epoch": 1.6729323308270678,
661
+ "grad_norm": 1.3739241361618042,
662
+ "learning_rate": 9.349750021108212e-06,
663
+ "loss": 1.2031,
664
+ "step": 890
665
+ },
666
+ {
667
+ "epoch": 1.6917293233082706,
668
+ "grad_norm": 1.2784823179244995,
669
+ "learning_rate": 9.335058174477472e-06,
670
+ "loss": 1.2768,
671
+ "step": 900
672
+ },
673
+ {
674
+ "epoch": 1.7105263157894737,
675
+ "grad_norm": 1.3196611404418945,
676
+ "learning_rate": 9.320214013201079e-06,
677
+ "loss": 1.1899,
678
+ "step": 910
679
+ },
680
+ {
681
+ "epoch": 1.7293233082706767,
682
+ "grad_norm": 1.2893208265304565,
683
+ "learning_rate": 9.305218058836778e-06,
684
+ "loss": 1.2266,
685
+ "step": 920
686
+ },
687
+ {
688
+ "epoch": 1.7481203007518797,
689
+ "grad_norm": 1.5164341926574707,
690
+ "learning_rate": 9.290070838275649e-06,
691
+ "loss": 1.2872,
692
+ "step": 930
693
+ },
694
+ {
695
+ "epoch": 1.7669172932330826,
696
+ "grad_norm": 1.5947052240371704,
697
+ "learning_rate": 9.274772883723587e-06,
698
+ "loss": 1.2469,
699
+ "step": 940
700
+ },
701
+ {
702
+ "epoch": 1.7857142857142856,
703
+ "grad_norm": 1.4149237871170044,
704
+ "learning_rate": 9.259324732682615e-06,
705
+ "loss": 1.2872,
706
+ "step": 950
707
+ },
708
+ {
709
+ "epoch": 1.8045112781954886,
710
+ "grad_norm": 1.3440284729003906,
711
+ "learning_rate": 9.24372692793199e-06,
712
+ "loss": 1.2179,
713
+ "step": 960
714
+ },
715
+ {
716
+ "epoch": 1.8233082706766917,
717
+ "grad_norm": 1.2356294393539429,
718
+ "learning_rate": 9.22798001750913e-06,
719
+ "loss": 1.2497,
720
+ "step": 970
721
+ },
722
+ {
723
+ "epoch": 1.8421052631578947,
724
+ "grad_norm": 1.37796950340271,
725
+ "learning_rate": 9.21208455469037e-06,
726
+ "loss": 1.2534,
727
+ "step": 980
728
+ },
729
+ {
730
+ "epoch": 1.8609022556390977,
731
+ "grad_norm": 1.5907108783721924,
732
+ "learning_rate": 9.196041097971509e-06,
733
+ "loss": 1.3171,
734
+ "step": 990
735
+ },
736
+ {
737
+ "epoch": 1.8796992481203008,
738
+ "grad_norm": 1.2945557832717896,
739
+ "learning_rate": 9.179850211048193e-06,
740
+ "loss": 1.307,
741
+ "step": 1000
742
+ },
743
+ {
744
+ "epoch": 1.8796992481203008,
745
+ "eval_loss": 1.2614006996154785,
746
+ "eval_runtime": 93.3681,
747
+ "eval_samples_per_second": 16.065,
748
+ "eval_steps_per_second": 1.007,
749
+ "step": 1000
750
+ },
751
+ {
752
+ "epoch": 1.8984962406015038,
753
+ "grad_norm": 1.3669792413711548,
754
+ "learning_rate": 9.163512462796113e-06,
755
+ "loss": 1.2613,
756
+ "step": 1010
757
+ },
758
+ {
759
+ "epoch": 1.9172932330827068,
760
+ "grad_norm": 1.444227695465088,
761
+ "learning_rate": 9.14702842725101e-06,
762
+ "loss": 1.242,
763
+ "step": 1020
764
+ },
765
+ {
766
+ "epoch": 1.9360902255639099,
767
+ "grad_norm": 1.4268559217453003,
768
+ "learning_rate": 9.13039868358851e-06,
769
+ "loss": 1.268,
770
+ "step": 1030
771
+ },
772
+ {
773
+ "epoch": 1.954887218045113,
774
+ "grad_norm": 1.410385012626648,
775
+ "learning_rate": 9.113623816103775e-06,
776
+ "loss": 1.2116,
777
+ "step": 1040
778
+ },
779
+ {
780
+ "epoch": 1.973684210526316,
781
+ "grad_norm": 1.4076857566833496,
782
+ "learning_rate": 9.09670441419097e-06,
783
+ "loss": 1.2582,
784
+ "step": 1050
785
+ },
786
+ {
787
+ "epoch": 1.9924812030075187,
788
+ "grad_norm": 1.5281563997268677,
789
+ "learning_rate": 9.079641072322555e-06,
790
+ "loss": 1.2643,
791
+ "step": 1060
792
+ },
793
+ {
794
+ "epoch": 2.011278195488722,
795
+ "grad_norm": 1.7657854557037354,
796
+ "learning_rate": 9.062434390028407e-06,
797
+ "loss": 1.2027,
798
+ "step": 1070
799
+ },
800
+ {
801
+ "epoch": 2.030075187969925,
802
+ "grad_norm": 1.4408607482910156,
803
+ "learning_rate": 9.045084971874738e-06,
804
+ "loss": 1.2506,
805
+ "step": 1080
806
+ },
807
+ {
808
+ "epoch": 2.0488721804511276,
809
+ "grad_norm": 1.6700369119644165,
810
+ "learning_rate": 9.027593427442867e-06,
811
+ "loss": 1.2196,
812
+ "step": 1090
813
+ },
814
+ {
815
+ "epoch": 2.0676691729323307,
816
+ "grad_norm": 1.6882362365722656,
817
+ "learning_rate": 9.009960371307798e-06,
818
+ "loss": 1.261,
819
+ "step": 1100
820
+ },
821
+ {
822
+ "epoch": 2.0864661654135337,
823
+ "grad_norm": 1.4808052778244019,
824
+ "learning_rate": 8.992186423016626e-06,
825
+ "loss": 1.2229,
826
+ "step": 1110
827
+ },
828
+ {
829
+ "epoch": 2.1052631578947367,
830
+ "grad_norm": 1.5231788158416748,
831
+ "learning_rate": 8.974272207066767e-06,
832
+ "loss": 1.1843,
833
+ "step": 1120
834
+ },
835
+ {
836
+ "epoch": 2.1240601503759398,
837
+ "grad_norm": 1.6113598346710205,
838
+ "learning_rate": 8.956218352884022e-06,
839
+ "loss": 1.2195,
840
+ "step": 1130
841
+ },
842
+ {
843
+ "epoch": 2.142857142857143,
844
+ "grad_norm": 1.7348911762237549,
845
+ "learning_rate": 8.938025494800454e-06,
846
+ "loss": 1.2789,
847
+ "step": 1140
848
+ },
849
+ {
850
+ "epoch": 2.161654135338346,
851
+ "grad_norm": 1.5242857933044434,
852
+ "learning_rate": 8.919694272032108e-06,
853
+ "loss": 1.1721,
854
+ "step": 1150
855
+ },
856
+ {
857
+ "epoch": 2.180451127819549,
858
+ "grad_norm": 1.6245213747024536,
859
+ "learning_rate": 8.901225328656543e-06,
860
+ "loss": 1.2334,
861
+ "step": 1160
862
+ },
863
+ {
864
+ "epoch": 2.199248120300752,
865
+ "grad_norm": 1.684967041015625,
866
+ "learning_rate": 8.882619313590212e-06,
867
+ "loss": 1.2439,
868
+ "step": 1170
869
+ },
870
+ {
871
+ "epoch": 2.218045112781955,
872
+ "grad_norm": 1.4683988094329834,
873
+ "learning_rate": 8.863876880565656e-06,
874
+ "loss": 1.1926,
875
+ "step": 1180
876
+ },
877
+ {
878
+ "epoch": 2.236842105263158,
879
+ "grad_norm": 1.6551039218902588,
880
+ "learning_rate": 8.844998688108535e-06,
881
+ "loss": 1.191,
882
+ "step": 1190
883
+ },
884
+ {
885
+ "epoch": 2.255639097744361,
886
+ "grad_norm": 1.6755002737045288,
887
+ "learning_rate": 8.825985399514488e-06,
888
+ "loss": 1.207,
889
+ "step": 1200
890
+ },
891
+ {
892
+ "epoch": 2.255639097744361,
893
+ "eval_loss": 1.2564018964767456,
894
+ "eval_runtime": 93.6498,
895
+ "eval_samples_per_second": 16.017,
896
+ "eval_steps_per_second": 1.004,
897
+ "step": 1200
898
+ },
899
+ {
900
+ "epoch": 2.274436090225564,
901
+ "grad_norm": 1.5405529737472534,
902
+ "learning_rate": 8.806837682825835e-06,
903
+ "loss": 1.2238,
904
+ "step": 1210
905
+ },
906
+ {
907
+ "epoch": 2.293233082706767,
908
+ "grad_norm": 1.57835853099823,
909
+ "learning_rate": 8.787556210808101e-06,
910
+ "loss": 1.1689,
911
+ "step": 1220
912
+ },
913
+ {
914
+ "epoch": 2.31203007518797,
915
+ "grad_norm": 1.5361424684524536,
916
+ "learning_rate": 8.768141660926375e-06,
917
+ "loss": 1.253,
918
+ "step": 1230
919
+ },
920
+ {
921
+ "epoch": 2.3308270676691727,
922
+ "grad_norm": 1.6792072057724,
923
+ "learning_rate": 8.748594715321512e-06,
924
+ "loss": 1.1906,
925
+ "step": 1240
926
+ },
927
+ {
928
+ "epoch": 2.3496240601503757,
929
+ "grad_norm": 1.6440975666046143,
930
+ "learning_rate": 8.728916060786162e-06,
931
+ "loss": 1.1598,
932
+ "step": 1250
933
+ },
934
+ {
935
+ "epoch": 2.3684210526315788,
936
+ "grad_norm": 1.7050859928131104,
937
+ "learning_rate": 8.70910638874064e-06,
938
+ "loss": 1.2064,
939
+ "step": 1260
940
+ },
941
+ {
942
+ "epoch": 2.387218045112782,
943
+ "grad_norm": 1.5904150009155273,
944
+ "learning_rate": 8.689166395208638e-06,
945
+ "loss": 1.2097,
946
+ "step": 1270
947
+ },
948
+ {
949
+ "epoch": 2.406015037593985,
950
+ "grad_norm": 1.8094373941421509,
951
+ "learning_rate": 8.669096780792754e-06,
952
+ "loss": 1.2016,
953
+ "step": 1280
954
+ },
955
+ {
956
+ "epoch": 2.424812030075188,
957
+ "grad_norm": 1.655340552330017,
958
+ "learning_rate": 8.6488982506499e-06,
959
+ "loss": 1.2202,
960
+ "step": 1290
961
+ },
962
+ {
963
+ "epoch": 2.443609022556391,
964
+ "grad_norm": 1.9319403171539307,
965
+ "learning_rate": 8.628571514466502e-06,
966
+ "loss": 1.2591,
967
+ "step": 1300
968
+ },
969
+ {
970
+ "epoch": 2.462406015037594,
971
+ "grad_norm": 1.9139586687088013,
972
+ "learning_rate": 8.608117286433583e-06,
973
+ "loss": 1.1778,
974
+ "step": 1310
975
+ },
976
+ {
977
+ "epoch": 2.481203007518797,
978
+ "grad_norm": 2.0142948627471924,
979
+ "learning_rate": 8.587536285221656e-06,
980
+ "loss": 1.1876,
981
+ "step": 1320
982
+ },
983
+ {
984
+ "epoch": 2.5,
985
+ "grad_norm": 1.6335065364837646,
986
+ "learning_rate": 8.566829233955484e-06,
987
+ "loss": 1.2413,
988
+ "step": 1330
989
+ },
990
+ {
991
+ "epoch": 2.518796992481203,
992
+ "grad_norm": 1.5473734140396118,
993
+ "learning_rate": 8.545996860188668e-06,
994
+ "loss": 1.2581,
995
+ "step": 1340
996
+ },
997
+ {
998
+ "epoch": 2.537593984962406,
999
+ "grad_norm": 1.8940939903259277,
1000
+ "learning_rate": 8.525039895878078e-06,
1001
+ "loss": 1.2328,
1002
+ "step": 1350
1003
+ },
1004
+ {
1005
+ "epoch": 2.556390977443609,
1006
+ "grad_norm": 2.160776376724243,
1007
+ "learning_rate": 8.503959077358143e-06,
1008
+ "loss": 1.2026,
1009
+ "step": 1360
1010
+ },
1011
+ {
1012
+ "epoch": 2.575187969924812,
1013
+ "grad_norm": 1.7434977293014526,
1014
+ "learning_rate": 8.482755145314987e-06,
1015
+ "loss": 1.2398,
1016
+ "step": 1370
1017
+ },
1018
+ {
1019
+ "epoch": 2.593984962406015,
1020
+ "grad_norm": 1.9678432941436768,
1021
+ "learning_rate": 8.46142884476038e-06,
1022
+ "loss": 1.2265,
1023
+ "step": 1380
1024
+ },
1025
+ {
1026
+ "epoch": 2.612781954887218,
1027
+ "grad_norm": 1.6937475204467773,
1028
+ "learning_rate": 8.439980925005587e-06,
1029
+ "loss": 1.1805,
1030
+ "step": 1390
1031
+ },
1032
+ {
1033
+ "epoch": 2.6315789473684212,
1034
+ "grad_norm": 1.9276158809661865,
1035
+ "learning_rate": 8.418412139635026e-06,
1036
+ "loss": 1.2654,
1037
+ "step": 1400
1038
+ },
1039
+ {
1040
+ "epoch": 2.6315789473684212,
1041
+ "eval_loss": 1.2535929679870605,
1042
+ "eval_runtime": 93.0776,
1043
+ "eval_samples_per_second": 16.116,
1044
+ "eval_steps_per_second": 1.01,
1045
+ "step": 1400
1046
+ },
1047
+ {
1048
+ "epoch": 2.6503759398496243,
1049
+ "grad_norm": 1.734552264213562,
1050
+ "learning_rate": 8.396723246479798e-06,
1051
+ "loss": 1.2129,
1052
+ "step": 1410
1053
+ },
1054
+ {
1055
+ "epoch": 2.6691729323308273,
1056
+ "grad_norm": 1.6371015310287476,
1057
+ "learning_rate": 8.374915007591053e-06,
1058
+ "loss": 1.1882,
1059
+ "step": 1420
1060
+ },
1061
+ {
1062
+ "epoch": 2.6879699248120303,
1063
+ "grad_norm": 1.7173954248428345,
1064
+ "learning_rate": 8.352988189213223e-06,
1065
+ "loss": 1.2261,
1066
+ "step": 1430
1067
+ },
1068
+ {
1069
+ "epoch": 2.706766917293233,
1070
+ "grad_norm": 1.769741177558899,
1071
+ "learning_rate": 8.330943561757092e-06,
1072
+ "loss": 1.2191,
1073
+ "step": 1440
1074
+ },
1075
+ {
1076
+ "epoch": 2.725563909774436,
1077
+ "grad_norm": 1.8008770942687988,
1078
+ "learning_rate": 8.308781899772731e-06,
1079
+ "loss": 1.2234,
1080
+ "step": 1450
1081
+ },
1082
+ {
1083
+ "epoch": 2.744360902255639,
1084
+ "grad_norm": 1.9747114181518555,
1085
+ "learning_rate": 8.286503981922284e-06,
1086
+ "loss": 1.2698,
1087
+ "step": 1460
1088
+ },
1089
+ {
1090
+ "epoch": 2.763157894736842,
1091
+ "grad_norm": 1.9045385122299194,
1092
+ "learning_rate": 8.264110590952609e-06,
1093
+ "loss": 1.2,
1094
+ "step": 1470
1095
+ },
1096
+ {
1097
+ "epoch": 2.781954887218045,
1098
+ "grad_norm": 1.8969700336456299,
1099
+ "learning_rate": 8.241602513667775e-06,
1100
+ "loss": 1.263,
1101
+ "step": 1480
1102
+ },
1103
+ {
1104
+ "epoch": 2.800751879699248,
1105
+ "grad_norm": 1.8622119426727295,
1106
+ "learning_rate": 8.218980540901417e-06,
1107
+ "loss": 1.2454,
1108
+ "step": 1490
1109
+ },
1110
+ {
1111
+ "epoch": 2.819548872180451,
1112
+ "grad_norm": 1.7723746299743652,
1113
+ "learning_rate": 8.19624546748895e-06,
1114
+ "loss": 1.2002,
1115
+ "step": 1500
1116
+ },
1117
+ {
1118
+ "epoch": 2.838345864661654,
1119
+ "grad_norm": 2.070909023284912,
1120
+ "learning_rate": 8.173398092239647e-06,
1121
+ "loss": 1.22,
1122
+ "step": 1510
1123
+ },
1124
+ {
1125
+ "epoch": 2.857142857142857,
1126
+ "grad_norm": 1.6946510076522827,
1127
+ "learning_rate": 8.150439217908557e-06,
1128
+ "loss": 1.1997,
1129
+ "step": 1520
1130
+ },
1131
+ {
1132
+ "epoch": 2.8759398496240602,
1133
+ "grad_norm": 1.7779756784439087,
1134
+ "learning_rate": 8.12736965116832e-06,
1135
+ "loss": 1.2291,
1136
+ "step": 1530
1137
+ },
1138
+ {
1139
+ "epoch": 2.8947368421052633,
1140
+ "grad_norm": 1.903606653213501,
1141
+ "learning_rate": 8.104190202580811e-06,
1142
+ "loss": 1.2645,
1143
+ "step": 1540
1144
+ },
1145
+ {
1146
+ "epoch": 2.9135338345864663,
1147
+ "grad_norm": 2.084599733352661,
1148
+ "learning_rate": 8.080901686568664e-06,
1149
+ "loss": 1.212,
1150
+ "step": 1550
1151
+ },
1152
+ {
1153
+ "epoch": 2.932330827067669,
1154
+ "grad_norm": 1.8313549757003784,
1155
+ "learning_rate": 8.057504921386661e-06,
1156
+ "loss": 1.1731,
1157
+ "step": 1560
1158
+ },
1159
+ {
1160
+ "epoch": 2.951127819548872,
1161
+ "grad_norm": 2.00838565826416,
1162
+ "learning_rate": 8.034000729092967e-06,
1163
+ "loss": 1.2283,
1164
+ "step": 1570
1165
+ },
1166
+ {
1167
+ "epoch": 2.969924812030075,
1168
+ "grad_norm": 1.9576573371887207,
1169
+ "learning_rate": 8.010389935520269e-06,
1170
+ "loss": 1.2285,
1171
+ "step": 1580
1172
+ },
1173
+ {
1174
+ "epoch": 2.988721804511278,
1175
+ "grad_norm": 1.8835121393203735,
1176
+ "learning_rate": 7.986673370246743e-06,
1177
+ "loss": 1.2275,
1178
+ "step": 1590
1179
+ },
1180
+ {
1181
+ "epoch": 3.007518796992481,
1182
+ "grad_norm": 1.9074680805206299,
1183
+ "learning_rate": 7.962851866566912e-06,
1184
+ "loss": 1.1963,
1185
+ "step": 1600
1186
+ },
1187
+ {
1188
+ "epoch": 3.007518796992481,
1189
+ "eval_loss": 1.2510054111480713,
1190
+ "eval_runtime": 93.3587,
1191
+ "eval_samples_per_second": 16.067,
1192
+ "eval_steps_per_second": 1.007,
1193
+ "step": 1600
1194
+ },
1195
+ {
1196
+ "epoch": 3.026315789473684,
1197
+ "grad_norm": 2.2151854038238525,
1198
+ "learning_rate": 7.938926261462366e-06,
1199
+ "loss": 1.1984,
1200
+ "step": 1610
1201
+ },
1202
+ {
1203
+ "epoch": 3.045112781954887,
1204
+ "grad_norm": 1.9173067808151245,
1205
+ "learning_rate": 7.914897395572362e-06,
1206
+ "loss": 1.183,
1207
+ "step": 1620
1208
+ },
1209
+ {
1210
+ "epoch": 3.06390977443609,
1211
+ "grad_norm": 2.0871455669403076,
1212
+ "learning_rate": 7.890766113164272e-06,
1213
+ "loss": 1.1625,
1214
+ "step": 1630
1215
+ },
1216
+ {
1217
+ "epoch": 3.082706766917293,
1218
+ "grad_norm": 1.9233837127685547,
1219
+ "learning_rate": 7.866533262103937e-06,
1220
+ "loss": 1.2114,
1221
+ "step": 1640
1222
+ },
1223
+ {
1224
+ "epoch": 3.101503759398496,
1225
+ "grad_norm": 1.902996301651001,
1226
+ "learning_rate": 7.842199693825863e-06,
1227
+ "loss": 1.1771,
1228
+ "step": 1650
1229
+ },
1230
+ {
1231
+ "epoch": 3.1203007518796992,
1232
+ "grad_norm": 1.9054787158966064,
1233
+ "learning_rate": 7.817766263303312e-06,
1234
+ "loss": 1.1924,
1235
+ "step": 1660
1236
+ },
1237
+ {
1238
+ "epoch": 3.1390977443609023,
1239
+ "grad_norm": 2.275200128555298,
1240
+ "learning_rate": 7.793233829018263e-06,
1241
+ "loss": 1.1873,
1242
+ "step": 1670
1243
+ },
1244
+ {
1245
+ "epoch": 3.1578947368421053,
1246
+ "grad_norm": 2.091639995574951,
1247
+ "learning_rate": 7.768603252931243e-06,
1248
+ "loss": 1.1504,
1249
+ "step": 1680
1250
+ },
1251
+ {
1252
+ "epoch": 3.1766917293233083,
1253
+ "grad_norm": 2.052783250808716,
1254
+ "learning_rate": 7.743875400451047e-06,
1255
+ "loss": 1.1758,
1256
+ "step": 1690
1257
+ },
1258
+ {
1259
+ "epoch": 3.1954887218045114,
1260
+ "grad_norm": 1.9168238639831543,
1261
+ "learning_rate": 7.719051140404327e-06,
1262
+ "loss": 1.1254,
1263
+ "step": 1700
1264
+ },
1265
+ {
1266
+ "epoch": 3.2142857142857144,
1267
+ "grad_norm": 2.0846714973449707,
1268
+ "learning_rate": 7.69413134500507e-06,
1269
+ "loss": 1.1965,
1270
+ "step": 1710
1271
+ },
1272
+ {
1273
+ "epoch": 3.2330827067669174,
1274
+ "grad_norm": 2.0492520332336426,
1275
+ "learning_rate": 7.669116889823955e-06,
1276
+ "loss": 1.1811,
1277
+ "step": 1720
1278
+ },
1279
+ {
1280
+ "epoch": 3.2518796992481205,
1281
+ "grad_norm": 1.936445713043213,
1282
+ "learning_rate": 7.644008653757571e-06,
1283
+ "loss": 1.1418,
1284
+ "step": 1730
1285
+ },
1286
+ {
1287
+ "epoch": 3.2706766917293235,
1288
+ "grad_norm": 2.3106260299682617,
1289
+ "learning_rate": 7.6188075189975644e-06,
1290
+ "loss": 1.1522,
1291
+ "step": 1740
1292
+ },
1293
+ {
1294
+ "epoch": 3.2894736842105265,
1295
+ "grad_norm": 2.2706220149993896,
1296
+ "learning_rate": 7.593514370999617e-06,
1297
+ "loss": 1.1975,
1298
+ "step": 1750
1299
+ },
1300
+ {
1301
+ "epoch": 3.308270676691729,
1302
+ "grad_norm": 2.2965404987335205,
1303
+ "learning_rate": 7.568130098452352e-06,
1304
+ "loss": 1.198,
1305
+ "step": 1760
1306
+ },
1307
+ {
1308
+ "epoch": 3.327067669172932,
1309
+ "grad_norm": 2.0302767753601074,
1310
+ "learning_rate": 7.542655593246103e-06,
1311
+ "loss": 1.1596,
1312
+ "step": 1770
1313
+ },
1314
+ {
1315
+ "epoch": 3.345864661654135,
1316
+ "grad_norm": 2.231513500213623,
1317
+ "learning_rate": 7.517091750441576e-06,
1318
+ "loss": 1.2303,
1319
+ "step": 1780
1320
+ },
1321
+ {
1322
+ "epoch": 3.3646616541353382,
1323
+ "grad_norm": 2.229316234588623,
1324
+ "learning_rate": 7.491439468238404e-06,
1325
+ "loss": 1.1841,
1326
+ "step": 1790
1327
+ },
1328
+ {
1329
+ "epoch": 3.3834586466165413,
1330
+ "grad_norm": 2.4000582695007324,
1331
+ "learning_rate": 7.465699647943586e-06,
1332
+ "loss": 1.164,
1333
+ "step": 1800
1334
+ },
1335
+ {
1336
+ "epoch": 3.3834586466165413,
1337
+ "eval_loss": 1.2509571313858032,
1338
+ "eval_runtime": 93.3319,
1339
+ "eval_samples_per_second": 16.072,
1340
+ "eval_steps_per_second": 1.007,
1341
+ "step": 1800
1342
+ },
1343
+ {
1344
+ "epoch": 3.4022556390977443,
1345
+ "grad_norm": 2.259418249130249,
1346
+ "learning_rate": 7.43987319393982e-06,
1347
+ "loss": 1.1798,
1348
+ "step": 1810
1349
+ },
1350
+ {
1351
+ "epoch": 3.4210526315789473,
1352
+ "grad_norm": 2.244920253753662,
1353
+ "learning_rate": 7.413961013653725e-06,
1354
+ "loss": 1.1729,
1355
+ "step": 1820
1356
+ },
1357
+ {
1358
+ "epoch": 3.4398496240601504,
1359
+ "grad_norm": 2.3745522499084473,
1360
+ "learning_rate": 7.387964017523964e-06,
1361
+ "loss": 1.2036,
1362
+ "step": 1830
1363
+ },
1364
+ {
1365
+ "epoch": 3.4586466165413534,
1366
+ "grad_norm": 2.5325570106506348,
1367
+ "learning_rate": 7.361883118969248e-06,
1368
+ "loss": 1.2009,
1369
+ "step": 1840
1370
+ },
1371
+ {
1372
+ "epoch": 3.4774436090225564,
1373
+ "grad_norm": 2.2491343021392822,
1374
+ "learning_rate": 7.335719234356245e-06,
1375
+ "loss": 1.1395,
1376
+ "step": 1850
1377
+ },
1378
+ {
1379
+ "epoch": 3.4962406015037595,
1380
+ "grad_norm": 2.3288066387176514,
1381
+ "learning_rate": 7.309473282967387e-06,
1382
+ "loss": 1.1893,
1383
+ "step": 1860
1384
+ },
1385
+ {
1386
+ "epoch": 3.5150375939849625,
1387
+ "grad_norm": 2.2251009941101074,
1388
+ "learning_rate": 7.283146186968566e-06,
1389
+ "loss": 1.1934,
1390
+ "step": 1870
1391
+ },
1392
+ {
1393
+ "epoch": 3.5338345864661656,
1394
+ "grad_norm": 2.130612850189209,
1395
+ "learning_rate": 7.256738871376733e-06,
1396
+ "loss": 1.1819,
1397
+ "step": 1880
1398
+ },
1399
+ {
1400
+ "epoch": 3.5526315789473686,
1401
+ "grad_norm": 2.1639811992645264,
1402
+ "learning_rate": 7.230252264027398e-06,
1403
+ "loss": 1.2371,
1404
+ "step": 1890
1405
+ },
1406
+ {
1407
+ "epoch": 3.571428571428571,
1408
+ "grad_norm": 2.719027042388916,
1409
+ "learning_rate": 7.203687295542032e-06,
1410
+ "loss": 1.227,
1411
+ "step": 1900
1412
+ },
1413
+ {
1414
+ "epoch": 3.590225563909774,
1415
+ "grad_norm": 2.3088018894195557,
1416
+ "learning_rate": 7.1770448992953676e-06,
1417
+ "loss": 1.1845,
1418
+ "step": 1910
1419
+ },
1420
+ {
1421
+ "epoch": 3.6090225563909772,
1422
+ "grad_norm": 2.2539610862731934,
1423
+ "learning_rate": 7.1503260113826035e-06,
1424
+ "loss": 1.1571,
1425
+ "step": 1920
1426
+ },
1427
+ {
1428
+ "epoch": 3.6278195488721803,
1429
+ "grad_norm": 2.1698110103607178,
1430
+ "learning_rate": 7.123531570586515e-06,
1431
+ "loss": 1.2118,
1432
+ "step": 1930
1433
+ },
1434
+ {
1435
+ "epoch": 3.6466165413533833,
1436
+ "grad_norm": 2.3570926189422607,
1437
+ "learning_rate": 7.09666251834447e-06,
1438
+ "loss": 1.2038,
1439
+ "step": 1940
1440
+ },
1441
+ {
1442
+ "epoch": 3.6654135338345863,
1443
+ "grad_norm": 2.3301756381988525,
1444
+ "learning_rate": 7.069719798715347e-06,
1445
+ "loss": 1.1971,
1446
+ "step": 1950
1447
+ },
1448
+ {
1449
+ "epoch": 3.6842105263157894,
1450
+ "grad_norm": 2.074479818344116,
1451
+ "learning_rate": 7.042704358346375e-06,
1452
+ "loss": 1.1426,
1453
+ "step": 1960
1454
+ },
1455
+ {
1456
+ "epoch": 3.7030075187969924,
1457
+ "grad_norm": 2.362988233566284,
1458
+ "learning_rate": 7.015617146439863e-06,
1459
+ "loss": 1.1678,
1460
+ "step": 1970
1461
+ },
1462
+ {
1463
+ "epoch": 3.7218045112781954,
1464
+ "grad_norm": 2.3498263359069824,
1465
+ "learning_rate": 6.988459114719849e-06,
1466
+ "loss": 1.1454,
1467
+ "step": 1980
1468
+ },
1469
+ {
1470
+ "epoch": 3.7406015037593985,
1471
+ "grad_norm": 2.538174629211426,
1472
+ "learning_rate": 6.9612312173986675e-06,
1473
+ "loss": 1.2414,
1474
+ "step": 1990
1475
+ },
1476
+ {
1477
+ "epoch": 3.7593984962406015,
1478
+ "grad_norm": 2.3670663833618164,
1479
+ "learning_rate": 6.933934411143419e-06,
1480
+ "loss": 1.2174,
1481
+ "step": 2000
1482
+ },
1483
+ {
1484
+ "epoch": 3.7593984962406015,
1485
+ "eval_loss": 1.2493139505386353,
1486
+ "eval_runtime": 93.3718,
1487
+ "eval_samples_per_second": 16.065,
1488
+ "eval_steps_per_second": 1.007,
1489
+ "step": 2000
1490
+ },
1491
+ {
1492
+ "epoch": 3.7781954887218046,
1493
+ "grad_norm": 2.3209378719329834,
1494
+ "learning_rate": 6.906569655042357e-06,
1495
+ "loss": 1.1772,
1496
+ "step": 2010
1497
+ },
1498
+ {
1499
+ "epoch": 3.7969924812030076,
1500
+ "grad_norm": 2.449416399002075,
1501
+ "learning_rate": 6.879137910571191e-06,
1502
+ "loss": 1.1805,
1503
+ "step": 2020
1504
+ },
1505
+ {
1506
+ "epoch": 3.8157894736842106,
1507
+ "grad_norm": 2.377382278442383,
1508
+ "learning_rate": 6.8516401415593005e-06,
1509
+ "loss": 1.2065,
1510
+ "step": 2030
1511
+ },
1512
+ {
1513
+ "epoch": 3.8345864661654137,
1514
+ "grad_norm": 2.3601431846618652,
1515
+ "learning_rate": 6.824077314155877e-06,
1516
+ "loss": 1.157,
1517
+ "step": 2040
1518
+ },
1519
+ {
1520
+ "epoch": 3.8533834586466167,
1521
+ "grad_norm": 2.5709738731384277,
1522
+ "learning_rate": 6.7964503967959705e-06,
1523
+ "loss": 1.1635,
1524
+ "step": 2050
1525
+ },
1526
+ {
1527
+ "epoch": 3.8721804511278197,
1528
+ "grad_norm": 2.3104586601257324,
1529
+ "learning_rate": 6.768760360166471e-06,
1530
+ "loss": 1.1307,
1531
+ "step": 2060
1532
+ },
1533
+ {
1534
+ "epoch": 3.8909774436090228,
1535
+ "grad_norm": 2.6564462184906006,
1536
+ "learning_rate": 6.741008177171995e-06,
1537
+ "loss": 1.1858,
1538
+ "step": 2070
1539
+ },
1540
+ {
1541
+ "epoch": 3.909774436090226,
1542
+ "grad_norm": 2.6480863094329834,
1543
+ "learning_rate": 6.713194822900707e-06,
1544
+ "loss": 1.1718,
1545
+ "step": 2080
1546
+ },
1547
+ {
1548
+ "epoch": 3.928571428571429,
1549
+ "grad_norm": 2.4512076377868652,
1550
+ "learning_rate": 6.6853212745900585e-06,
1551
+ "loss": 1.1452,
1552
+ "step": 2090
1553
+ },
1554
+ {
1555
+ "epoch": 3.9473684210526314,
1556
+ "grad_norm": 2.5932700634002686,
1557
+ "learning_rate": 6.657388511592453e-06,
1558
+ "loss": 1.1107,
1559
+ "step": 2100
1560
+ },
1561
+ {
1562
+ "epoch": 3.9661654135338344,
1563
+ "grad_norm": 2.5317165851593018,
1564
+ "learning_rate": 6.62939751534083e-06,
1565
+ "loss": 1.1833,
1566
+ "step": 2110
1567
+ },
1568
+ {
1569
+ "epoch": 3.9849624060150375,
1570
+ "grad_norm": 2.370185375213623,
1571
+ "learning_rate": 6.601349269314188e-06,
1572
+ "loss": 1.1474,
1573
+ "step": 2120
1574
+ },
1575
+ {
1576
+ "epoch": 4.003759398496241,
1577
+ "grad_norm": 2.653226613998413,
1578
+ "learning_rate": 6.573244759003033e-06,
1579
+ "loss": 1.1439,
1580
+ "step": 2130
1581
+ },
1582
+ {
1583
+ "epoch": 4.022556390977444,
1584
+ "grad_norm": 2.3954885005950928,
1585
+ "learning_rate": 6.545084971874738e-06,
1586
+ "loss": 1.1256,
1587
+ "step": 2140
1588
+ },
1589
+ {
1590
+ "epoch": 4.041353383458647,
1591
+ "grad_norm": 2.662186622619629,
1592
+ "learning_rate": 6.516870897338864e-06,
1593
+ "loss": 1.1084,
1594
+ "step": 2150
1595
+ },
1596
+ {
1597
+ "epoch": 4.06015037593985,
1598
+ "grad_norm": 2.688732385635376,
1599
+ "learning_rate": 6.488603526712391e-06,
1600
+ "loss": 1.1515,
1601
+ "step": 2160
1602
+ },
1603
+ {
1604
+ "epoch": 4.078947368421052,
1605
+ "grad_norm": 2.86458420753479,
1606
+ "learning_rate": 6.46028385318488e-06,
1607
+ "loss": 1.1498,
1608
+ "step": 2170
1609
+ },
1610
+ {
1611
+ "epoch": 4.097744360902255,
1612
+ "grad_norm": 2.4699411392211914,
1613
+ "learning_rate": 6.431912871783587e-06,
1614
+ "loss": 1.1487,
1615
+ "step": 2180
1616
+ },
1617
+ {
1618
+ "epoch": 4.116541353383458,
1619
+ "grad_norm": 2.6786465644836426,
1620
+ "learning_rate": 6.4034915793385e-06,
1621
+ "loss": 1.1616,
1622
+ "step": 2190
1623
+ },
1624
+ {
1625
+ "epoch": 4.135338345864661,
1626
+ "grad_norm": 2.7973482608795166,
1627
+ "learning_rate": 6.3750209744473105e-06,
1628
+ "loss": 1.1286,
1629
+ "step": 2200
1630
+ },
1631
+ {
1632
+ "epoch": 4.135338345864661,
1633
+ "eval_loss": 1.2525601387023926,
1634
+ "eval_runtime": 93.0369,
1635
+ "eval_samples_per_second": 16.123,
1636
+ "eval_steps_per_second": 1.01,
1637
+ "step": 2200
1638
+ },
1639
+ {
1640
+ "epoch": 4.154135338345864,
1641
+ "grad_norm": 2.7302699089050293,
1642
+ "learning_rate": 6.346502057440327e-06,
1643
+ "loss": 1.1263,
1644
+ "step": 2210
1645
+ },
1646
+ {
1647
+ "epoch": 4.172932330827067,
1648
+ "grad_norm": 2.5245988368988037,
1649
+ "learning_rate": 6.3179358303453386e-06,
1650
+ "loss": 1.0954,
1651
+ "step": 2220
1652
+ },
1653
+ {
1654
+ "epoch": 4.19172932330827,
1655
+ "grad_norm": 2.456031322479248,
1656
+ "learning_rate": 6.289323296852393e-06,
1657
+ "loss": 1.0753,
1658
+ "step": 2230
1659
+ },
1660
+ {
1661
+ "epoch": 4.2105263157894735,
1662
+ "grad_norm": 2.6923177242279053,
1663
+ "learning_rate": 6.260665462278544e-06,
1664
+ "loss": 1.1932,
1665
+ "step": 2240
1666
+ },
1667
+ {
1668
+ "epoch": 4.2293233082706765,
1669
+ "grad_norm": 3.076815605163574,
1670
+ "learning_rate": 6.231963333532516e-06,
1671
+ "loss": 1.1761,
1672
+ "step": 2250
1673
+ },
1674
+ {
1675
+ "epoch": 4.2481203007518795,
1676
+ "grad_norm": 2.7684240341186523,
1677
+ "learning_rate": 6.203217919079343e-06,
1678
+ "loss": 1.1748,
1679
+ "step": 2260
1680
+ },
1681
+ {
1682
+ "epoch": 4.2669172932330826,
1683
+ "grad_norm": 2.6578075885772705,
1684
+ "learning_rate": 6.17443022890492e-06,
1685
+ "loss": 1.1325,
1686
+ "step": 2270
1687
+ },
1688
+ {
1689
+ "epoch": 4.285714285714286,
1690
+ "grad_norm": 2.995903491973877,
1691
+ "learning_rate": 6.145601274480521e-06,
1692
+ "loss": 1.1644,
1693
+ "step": 2280
1694
+ },
1695
+ {
1696
+ "epoch": 4.304511278195489,
1697
+ "grad_norm": 2.9120066165924072,
1698
+ "learning_rate": 6.116732068727271e-06,
1699
+ "loss": 1.2016,
1700
+ "step": 2290
1701
+ },
1702
+ {
1703
+ "epoch": 4.323308270676692,
1704
+ "grad_norm": 3.0313379764556885,
1705
+ "learning_rate": 6.08782362598054e-06,
1706
+ "loss": 1.1828,
1707
+ "step": 2300
1708
+ },
1709
+ {
1710
+ "epoch": 4.342105263157895,
1711
+ "grad_norm": 3.1084587574005127,
1712
+ "learning_rate": 6.058876961954308e-06,
1713
+ "loss": 1.1837,
1714
+ "step": 2310
1715
+ },
1716
+ {
1717
+ "epoch": 4.360902255639098,
1718
+ "grad_norm": 3.0962178707122803,
1719
+ "learning_rate": 6.029893093705492e-06,
1720
+ "loss": 1.1689,
1721
+ "step": 2320
1722
+ },
1723
+ {
1724
+ "epoch": 4.379699248120301,
1725
+ "grad_norm": 2.77724027633667,
1726
+ "learning_rate": 6.0008730395981905e-06,
1727
+ "loss": 1.1043,
1728
+ "step": 2330
1729
+ },
1730
+ {
1731
+ "epoch": 4.398496240601504,
1732
+ "grad_norm": 2.8677093982696533,
1733
+ "learning_rate": 5.971817819267914e-06,
1734
+ "loss": 1.162,
1735
+ "step": 2340
1736
+ },
1737
+ {
1738
+ "epoch": 4.417293233082707,
1739
+ "grad_norm": 3.0676045417785645,
1740
+ "learning_rate": 5.9427284535857585e-06,
1741
+ "loss": 1.1986,
1742
+ "step": 2350
1743
+ },
1744
+ {
1745
+ "epoch": 4.43609022556391,
1746
+ "grad_norm": 2.999682664871216,
1747
+ "learning_rate": 5.9136059646225375e-06,
1748
+ "loss": 1.1752,
1749
+ "step": 2360
1750
+ },
1751
+ {
1752
+ "epoch": 4.454887218045113,
1753
+ "grad_norm": 2.8018548488616943,
1754
+ "learning_rate": 5.884451375612865e-06,
1755
+ "loss": 1.1221,
1756
+ "step": 2370
1757
+ },
1758
+ {
1759
+ "epoch": 4.473684210526316,
1760
+ "grad_norm": 3.1642117500305176,
1761
+ "learning_rate": 5.855265710919211e-06,
1762
+ "loss": 1.0907,
1763
+ "step": 2380
1764
+ },
1765
+ {
1766
+ "epoch": 4.492481203007519,
1767
+ "grad_norm": 2.7529778480529785,
1768
+ "learning_rate": 5.826049995995905e-06,
1769
+ "loss": 1.1429,
1770
+ "step": 2390
1771
+ },
1772
+ {
1773
+ "epoch": 4.511278195488722,
1774
+ "grad_norm": 2.7263307571411133,
1775
+ "learning_rate": 5.796805257353109e-06,
1776
+ "loss": 1.1467,
1777
+ "step": 2400
1778
+ },
1779
+ {
1780
+ "epoch": 4.511278195488722,
1781
+ "eval_loss": 1.255226731300354,
1782
+ "eval_runtime": 93.8019,
1783
+ "eval_samples_per_second": 15.991,
1784
+ "eval_steps_per_second": 1.002,
1785
+ "step": 2400
1786
+ },
1787
+ {
1788
+ "epoch": 4.530075187969925,
1789
+ "grad_norm": 2.7593045234680176,
1790
+ "learning_rate": 5.767532522520746e-06,
1791
+ "loss": 1.0982,
1792
+ "step": 2410
1793
+ },
1794
+ {
1795
+ "epoch": 4.548872180451128,
1796
+ "grad_norm": 2.8725757598876953,
1797
+ "learning_rate": 5.738232820012407e-06,
1798
+ "loss": 1.1655,
1799
+ "step": 2420
1800
+ },
1801
+ {
1802
+ "epoch": 4.567669172932331,
1803
+ "grad_norm": 2.7557260990142822,
1804
+ "learning_rate": 5.7089071792892e-06,
1805
+ "loss": 1.1364,
1806
+ "step": 2430
1807
+ },
1808
+ {
1809
+ "epoch": 4.586466165413534,
1810
+ "grad_norm": 3.0971968173980713,
1811
+ "learning_rate": 5.679556630723592e-06,
1812
+ "loss": 1.0997,
1813
+ "step": 2440
1814
+ },
1815
+ {
1816
+ "epoch": 4.605263157894737,
1817
+ "grad_norm": 2.843336343765259,
1818
+ "learning_rate": 5.6501822055631976e-06,
1819
+ "loss": 1.0726,
1820
+ "step": 2450
1821
+ },
1822
+ {
1823
+ "epoch": 4.62406015037594,
1824
+ "grad_norm": 3.010181188583374,
1825
+ "learning_rate": 5.620784935894548e-06,
1826
+ "loss": 1.0923,
1827
+ "step": 2460
1828
+ },
1829
+ {
1830
+ "epoch": 4.642857142857143,
1831
+ "grad_norm": 3.034456491470337,
1832
+ "learning_rate": 5.591365854606829e-06,
1833
+ "loss": 1.1551,
1834
+ "step": 2470
1835
+ },
1836
+ {
1837
+ "epoch": 4.661654135338345,
1838
+ "grad_norm": 2.9387736320495605,
1839
+ "learning_rate": 5.561925995355595e-06,
1840
+ "loss": 1.1109,
1841
+ "step": 2480
1842
+ },
1843
+ {
1844
+ "epoch": 4.680451127819548,
1845
+ "grad_norm": 2.930490493774414,
1846
+ "learning_rate": 5.532466392526439e-06,
1847
+ "loss": 1.0835,
1848
+ "step": 2490
1849
+ },
1850
+ {
1851
+ "epoch": 4.6992481203007515,
1852
+ "grad_norm": 2.8749279975891113,
1853
+ "learning_rate": 5.5029880811986546e-06,
1854
+ "loss": 1.109,
1855
+ "step": 2500
1856
+ },
1857
+ {
1858
+ "epoch": 4.7180451127819545,
1859
+ "grad_norm": 3.019275426864624,
1860
+ "learning_rate": 5.4734920971088766e-06,
1861
+ "loss": 1.1409,
1862
+ "step": 2510
1863
+ },
1864
+ {
1865
+ "epoch": 4.7368421052631575,
1866
+ "grad_norm": 3.259063959121704,
1867
+ "learning_rate": 5.443979476614674e-06,
1868
+ "loss": 1.171,
1869
+ "step": 2520
1870
+ },
1871
+ {
1872
+ "epoch": 4.7556390977443606,
1873
+ "grad_norm": 3.019524574279785,
1874
+ "learning_rate": 5.4144512566581495e-06,
1875
+ "loss": 1.1815,
1876
+ "step": 2530
1877
+ },
1878
+ {
1879
+ "epoch": 4.774436090225564,
1880
+ "grad_norm": 3.458449125289917,
1881
+ "learning_rate": 5.384908474729501e-06,
1882
+ "loss": 1.1535,
1883
+ "step": 2540
1884
+ },
1885
+ {
1886
+ "epoch": 4.793233082706767,
1887
+ "grad_norm": 2.9792213439941406,
1888
+ "learning_rate": 5.3553521688305655e-06,
1889
+ "loss": 1.1426,
1890
+ "step": 2550
1891
+ },
1892
+ {
1893
+ "epoch": 4.81203007518797,
1894
+ "grad_norm": 3.1435463428497314,
1895
+ "learning_rate": 5.325783377438357e-06,
1896
+ "loss": 1.1248,
1897
+ "step": 2560
1898
+ },
1899
+ {
1900
+ "epoch": 4.830827067669173,
1901
+ "grad_norm": 3.020418167114258,
1902
+ "learning_rate": 5.296203139468572e-06,
1903
+ "loss": 1.079,
1904
+ "step": 2570
1905
+ },
1906
+ {
1907
+ "epoch": 4.849624060150376,
1908
+ "grad_norm": 3.183685779571533,
1909
+ "learning_rate": 5.266612494239088e-06,
1910
+ "loss": 1.0826,
1911
+ "step": 2580
1912
+ },
1913
+ {
1914
+ "epoch": 4.868421052631579,
1915
+ "grad_norm": 3.304868698120117,
1916
+ "learning_rate": 5.23701248143345e-06,
1917
+ "loss": 1.1501,
1918
+ "step": 2590
1919
+ },
1920
+ {
1921
+ "epoch": 4.887218045112782,
1922
+ "grad_norm": 3.051421642303467,
1923
+ "learning_rate": 5.207404141064334e-06,
1924
+ "loss": 1.1357,
1925
+ "step": 2600
1926
+ },
1927
+ {
1928
+ "epoch": 4.887218045112782,
1929
+ "eval_loss": 1.2556278705596924,
1930
+ "eval_runtime": 93.0175,
1931
+ "eval_samples_per_second": 16.126,
1932
+ "eval_steps_per_second": 1.011,
1933
+ "step": 2600
1934
+ },
1935
+ {
1936
+ "epoch": 4.906015037593985,
1937
+ "grad_norm": 3.5848500728607178,
1938
+ "learning_rate": 5.177788513437013e-06,
1939
+ "loss": 1.1404,
1940
+ "step": 2610
1941
+ },
1942
+ {
1943
+ "epoch": 4.924812030075188,
1944
+ "grad_norm": 2.9986302852630615,
1945
+ "learning_rate": 5.148166639112799e-06,
1946
+ "loss": 1.141,
1947
+ "step": 2620
1948
+ },
1949
+ {
1950
+ "epoch": 4.943609022556391,
1951
+ "grad_norm": 3.145249366760254,
1952
+ "learning_rate": 5.118539558872489e-06,
1953
+ "loss": 1.1371,
1954
+ "step": 2630
1955
+ },
1956
+ {
1957
+ "epoch": 4.962406015037594,
1958
+ "grad_norm": 3.0202174186706543,
1959
+ "learning_rate": 5.088908313679788e-06,
1960
+ "loss": 1.1056,
1961
+ "step": 2640
1962
+ },
1963
+ {
1964
+ "epoch": 4.981203007518797,
1965
+ "grad_norm": 3.3212361335754395,
1966
+ "learning_rate": 5.059273944644742e-06,
1967
+ "loss": 1.1125,
1968
+ "step": 2650
1969
+ },
1970
+ {
1971
+ "epoch": 5.0,
1972
+ "grad_norm": 2.910068988800049,
1973
+ "learning_rate": 5.029637492987153e-06,
1974
+ "loss": 1.1029,
1975
+ "step": 2660
1976
+ },
1977
+ {
1978
+ "epoch": 5.018796992481203,
1979
+ "grad_norm": 2.9455108642578125,
1980
+ "learning_rate": 5e-06,
1981
+ "loss": 1.1127,
1982
+ "step": 2670
1983
+ },
1984
+ {
1985
+ "epoch": 5.037593984962406,
1986
+ "grad_norm": 2.8705708980560303,
1987
+ "learning_rate": 4.970362507012848e-06,
1988
+ "loss": 1.1489,
1989
+ "step": 2680
1990
+ },
1991
+ {
1992
+ "epoch": 5.056390977443609,
1993
+ "grad_norm": 3.432981252670288,
1994
+ "learning_rate": 4.940726055355259e-06,
1995
+ "loss": 1.1115,
1996
+ "step": 2690
1997
+ },
1998
+ {
1999
+ "epoch": 5.075187969924812,
2000
+ "grad_norm": 3.39704966545105,
2001
+ "learning_rate": 4.911091686320213e-06,
2002
+ "loss": 1.1115,
2003
+ "step": 2700
2004
+ },
2005
+ {
2006
+ "epoch": 5.093984962406015,
2007
+ "grad_norm": 3.0686161518096924,
2008
+ "learning_rate": 4.881460441127513e-06,
2009
+ "loss": 1.0303,
2010
+ "step": 2710
2011
+ },
2012
+ {
2013
+ "epoch": 5.112781954887218,
2014
+ "grad_norm": 3.2145256996154785,
2015
+ "learning_rate": 4.8518333608872015e-06,
2016
+ "loss": 1.1397,
2017
+ "step": 2720
2018
+ },
2019
+ {
2020
+ "epoch": 5.131578947368421,
2021
+ "grad_norm": 3.2624154090881348,
2022
+ "learning_rate": 4.822211486562989e-06,
2023
+ "loss": 1.1045,
2024
+ "step": 2730
2025
+ },
2026
+ {
2027
+ "epoch": 5.150375939849624,
2028
+ "grad_norm": 3.310004711151123,
2029
+ "learning_rate": 4.792595858935668e-06,
2030
+ "loss": 1.12,
2031
+ "step": 2740
2032
+ },
2033
+ {
2034
+ "epoch": 5.169172932330827,
2035
+ "grad_norm": 3.4311578273773193,
2036
+ "learning_rate": 4.7629875185665505e-06,
2037
+ "loss": 1.0694,
2038
+ "step": 2750
2039
+ },
2040
+ {
2041
+ "epoch": 5.18796992481203,
2042
+ "grad_norm": 3.323531150817871,
2043
+ "learning_rate": 4.733387505760913e-06,
2044
+ "loss": 1.0619,
2045
+ "step": 2760
2046
+ },
2047
+ {
2048
+ "epoch": 5.206766917293233,
2049
+ "grad_norm": 3.4392173290252686,
2050
+ "learning_rate": 4.703796860531429e-06,
2051
+ "loss": 1.0368,
2052
+ "step": 2770
2053
+ },
2054
+ {
2055
+ "epoch": 5.225563909774436,
2056
+ "grad_norm": 3.3521625995635986,
2057
+ "learning_rate": 4.674216622561645e-06,
2058
+ "loss": 1.0728,
2059
+ "step": 2780
2060
+ },
2061
+ {
2062
+ "epoch": 5.2443609022556394,
2063
+ "grad_norm": 3.586398124694824,
2064
+ "learning_rate": 4.644647831169435e-06,
2065
+ "loss": 1.1404,
2066
+ "step": 2790
2067
+ },
2068
+ {
2069
+ "epoch": 5.2631578947368425,
2070
+ "grad_norm": 3.6765518188476562,
2071
+ "learning_rate": 4.6150915252705005e-06,
2072
+ "loss": 1.0742,
2073
+ "step": 2800
2074
+ },
2075
+ {
2076
+ "epoch": 5.2631578947368425,
2077
+ "eval_loss": 1.2630378007888794,
2078
+ "eval_runtime": 92.9444,
2079
+ "eval_samples_per_second": 16.139,
2080
+ "eval_steps_per_second": 1.011,
2081
+ "step": 2800
2082
+ },
2083
+ {
2084
+ "epoch": 5.2631578947368425,
2085
+ "step": 2800,
2086
+ "total_flos": 2.975888186909131e+18,
2087
+ "train_loss": 1.220632496220725,
2088
+ "train_runtime": 9849.9922,
2089
+ "train_samples_per_second": 8.629,
2090
+ "train_steps_per_second": 0.54
2091
+ }
2092
+ ],
2093
+ "logging_steps": 10,
2094
+ "max_steps": 5320,
2095
+ "num_input_tokens_seen": 0,
2096
+ "num_train_epochs": 10,
2097
+ "save_steps": 1000,
2098
+ "total_flos": 2.975888186909131e+18,
2099
+ "train_batch_size": 8,
2100
+ "trial_name": null,
2101
+ "trial_params": null
2102
+ }
llama3_8b_peft/cnn_dailymail/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:223d21875b305bea3b76a360a7f2a6373aea7495de907dabd28c7ea01428da17
3
+ size 5176
llama3_8b_peft/cnn_dailymail/training_eval_loss.png ADDED
llama3_8b_peft/cnn_dailymail/training_loss.png ADDED
llama3_8b_peft/goal_step_wikihow/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: goal_step_wikihow_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # goal_step_wikihow_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the goal_step_wikihow_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0130
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 5e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
+ - lr_scheduler_type: cosine
46
+ - lr_scheduler_warmup_steps: 20
47
+ - num_epochs: 5.0
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss |
52
+ |:-------------:|:------:|:----:|:---------------:|
53
+ | 0.0401 | 0.1493 | 100 | 0.0295 |
54
+ | 0.0476 | 0.2985 | 200 | 0.0140 |
55
+ | 0.0246 | 0.4478 | 300 | 0.0158 |
56
+ | 0.031 | 0.5970 | 400 | 0.0130 |
57
+ | 0.0254 | 0.7463 | 500 | 0.0167 |
58
+ | 0.0343 | 0.8955 | 600 | 0.0171 |
59
+ | 0.0057 | 1.0448 | 700 | 0.0130 |
60
+
61
+
62
+ ### Framework versions
63
+
64
+ - PEFT 0.10.0
65
+ - Transformers 4.40.0
66
+ - Pytorch 2.2.1
67
+ - Datasets 2.18.0
68
+ - Tokenizers 0.19.1
llama3_8b_peft/goal_step_wikihow/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "o_proj",
24
+ "gate_proj",
25
+ "down_proj",
26
+ "q_proj",
27
+ "k_proj",
28
+ "up_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/goal_step_wikihow/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8155d31e78a4aba71dcd13800ee3c6dc1af59e4d190292b630905b1d958fadf
3
+ size 83945296
llama3_8b_peft/goal_step_wikihow/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.044776119402985,
3
+ "eval_loss": 0.012991434894502163,
4
+ "eval_runtime": 3.2528,
5
+ "eval_samples_per_second": 87.001,
6
+ "eval_steps_per_second": 11.067,
7
+ "total_flos": 3.130965478814515e+16,
8
+ "train_loss": 0.060939116749380316,
9
+ "train_runtime": 244.1628,
10
+ "train_samples_per_second": 109.763,
11
+ "train_steps_per_second": 13.72
12
+ }
llama3_8b_peft/goal_step_wikihow/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.044776119402985,
3
+ "eval_loss": 0.012991434894502163,
4
+ "eval_runtime": 3.2528,
5
+ "eval_samples_per_second": 87.001,
6
+ "eval_steps_per_second": 11.067
7
+ }
llama3_8b_peft/goal_step_wikihow/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/goal_step_wikihow/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/goal_step_wikihow/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }
llama3_8b_peft/goal_step_wikihow/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.044776119402985,
3
+ "total_flos": 3.130965478814515e+16,
4
+ "train_loss": 0.060939116749380316,
5
+ "train_runtime": 244.1628,
6
+ "train_samples_per_second": 109.763,
7
+ "train_steps_per_second": 13.72
8
+ }
llama3_8b_peft/goal_step_wikihow/trainer_log.jsonl ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 3350, "loss": 1.7141, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5e-05, "epoch": 0.014925373134328358, "percentage": 0.3, "elapsed_time": "0:00:04", "remaining_time": "0:26:34"}
2
+ {"current_steps": 20, "total_steps": 3350, "loss": 0.7203, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-05, "epoch": 0.029850746268656716, "percentage": 0.6, "elapsed_time": "0:00:07", "remaining_time": "0:21:32"}
3
+ {"current_steps": 30, "total_steps": 3350, "loss": 0.0767, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.999888745376028e-05, "epoch": 0.04477611940298507, "percentage": 0.9, "elapsed_time": "0:00:11", "remaining_time": "0:20:24"}
4
+ {"current_steps": 40, "total_steps": 3350, "loss": 0.1053, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9995549914061836e-05, "epoch": 0.05970149253731343, "percentage": 1.19, "elapsed_time": "0:00:13", "remaining_time": "0:19:02"}
5
+ {"current_steps": 50, "total_steps": 3350, "loss": 0.0619, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.998998767795805e-05, "epoch": 0.07462686567164178, "percentage": 1.49, "elapsed_time": "0:00:16", "remaining_time": "0:18:27"}
6
+ {"current_steps": 60, "total_steps": 3350, "loss": 0.0472, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.99822012405085e-05, "epoch": 0.08955223880597014, "percentage": 1.79, "elapsed_time": "0:00:19", "remaining_time": "0:18:12"}
7
+ {"current_steps": 70, "total_steps": 3350, "loss": 0.0514, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.997219129473495e-05, "epoch": 0.1044776119402985, "percentage": 2.09, "elapsed_time": "0:00:23", "remaining_time": "0:18:24"}
8
+ {"current_steps": 80, "total_steps": 3350, "loss": 0.0441, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.995995873155958e-05, "epoch": 0.11940298507462686, "percentage": 2.39, "elapsed_time": "0:00:26", "remaining_time": "0:18:07"}
9
+ {"current_steps": 90, "total_steps": 3350, "loss": 0.0507, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.994550463972577e-05, "epoch": 0.13432835820895522, "percentage": 2.69, "elapsed_time": "0:00:29", "remaining_time": "0:17:46"}
10
+ {"current_steps": 100, "total_steps": 3350, "loss": 0.0401, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.992883030570116e-05, "epoch": 0.14925373134328357, "percentage": 2.99, "elapsed_time": "0:00:32", "remaining_time": "0:17:32"}
11
+ {"current_steps": 100, "total_steps": 3350, "loss": null, "eval_loss": 0.029515134170651436, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.14925373134328357, "percentage": 2.99, "elapsed_time": "0:00:32", "remaining_time": "0:17:32"}
12
+ {"current_steps": 110, "total_steps": 3350, "loss": 0.0491, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9909937213563165e-05, "epoch": 0.16417910447761194, "percentage": 3.28, "elapsed_time": "0:00:39", "remaining_time": "0:19:09"}
13
+ {"current_steps": 120, "total_steps": 3350, "loss": 0.0251, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.988882704486687e-05, "epoch": 0.1791044776119403, "percentage": 3.58, "elapsed_time": "0:00:41", "remaining_time": "0:18:44"}
14
+ {"current_steps": 130, "total_steps": 3350, "loss": 0.0366, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9865501678495375e-05, "epoch": 0.19402985074626866, "percentage": 3.88, "elapsed_time": "0:00:44", "remaining_time": "0:18:33"}
15
+ {"current_steps": 140, "total_steps": 3350, "loss": 0.0313, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9839963190492576e-05, "epoch": 0.208955223880597, "percentage": 4.18, "elapsed_time": "0:00:48", "remaining_time": "0:18:21"}
16
+ {"current_steps": 150, "total_steps": 3350, "loss": 0.037, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9812213853878376e-05, "epoch": 0.22388059701492538, "percentage": 4.48, "elapsed_time": "0:00:50", "remaining_time": "0:18:05"}
17
+ {"current_steps": 160, "total_steps": 3350, "loss": 0.0211, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.978225613844639e-05, "epoch": 0.23880597014925373, "percentage": 4.78, "elapsed_time": "0:00:53", "remaining_time": "0:17:55"}
18
+ {"current_steps": 170, "total_steps": 3350, "loss": 0.0346, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.975009271054409e-05, "epoch": 0.2537313432835821, "percentage": 5.07, "elapsed_time": "0:00:56", "remaining_time": "0:17:42"}
19
+ {"current_steps": 180, "total_steps": 3350, "loss": 0.0183, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.971572643283557e-05, "epoch": 0.26865671641791045, "percentage": 5.37, "elapsed_time": "0:00:59", "remaining_time": "0:17:32"}
20
+ {"current_steps": 190, "total_steps": 3350, "loss": 0.0244, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9679160364046644e-05, "epoch": 0.2835820895522388, "percentage": 5.67, "elapsed_time": "0:01:02", "remaining_time": "0:17:25"}
21
+ {"current_steps": 200, "total_steps": 3350, "loss": 0.0476, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9640397758692715e-05, "epoch": 0.29850746268656714, "percentage": 5.97, "elapsed_time": "0:01:05", "remaining_time": "0:17:14"}
22
+ {"current_steps": 200, "total_steps": 3350, "loss": null, "eval_loss": 0.014047912321984768, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.29850746268656714, "percentage": 5.97, "elapsed_time": "0:01:05", "remaining_time": "0:17:14"}
23
+ {"current_steps": 210, "total_steps": 3350, "loss": 0.0289, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9599442066789035e-05, "epoch": 0.31343283582089554, "percentage": 6.27, "elapsed_time": "0:01:12", "remaining_time": "0:17:57"}
24
+ {"current_steps": 220, "total_steps": 3350, "loss": 0.0188, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.95562969335437e-05, "epoch": 0.3283582089552239, "percentage": 6.57, "elapsed_time": "0:01:15", "remaining_time": "0:17:48"}
25
+ {"current_steps": 230, "total_steps": 3350, "loss": 0.0232, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9510966199033174e-05, "epoch": 0.34328358208955223, "percentage": 6.87, "elapsed_time": "0:01:18", "remaining_time": "0:17:40"}
26
+ {"current_steps": 240, "total_steps": 3350, "loss": 0.0232, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.946345389786049e-05, "epoch": 0.3582089552238806, "percentage": 7.16, "elapsed_time": "0:01:21", "remaining_time": "0:17:30"}
27
+ {"current_steps": 250, "total_steps": 3350, "loss": 0.0315, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.941376425879624e-05, "epoch": 0.373134328358209, "percentage": 7.46, "elapsed_time": "0:01:23", "remaining_time": "0:17:21"}
28
+ {"current_steps": 260, "total_steps": 3350, "loss": 0.0165, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.936190170440208e-05, "epoch": 0.3880597014925373, "percentage": 7.76, "elapsed_time": "0:01:27", "remaining_time": "0:17:14"}
29
+ {"current_steps": 270, "total_steps": 3350, "loss": 0.025, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.930787085063723e-05, "epoch": 0.40298507462686567, "percentage": 8.06, "elapsed_time": "0:01:30", "remaining_time": "0:17:08"}
30
+ {"current_steps": 280, "total_steps": 3350, "loss": 0.0195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.925167650644752e-05, "epoch": 0.417910447761194, "percentage": 8.36, "elapsed_time": "0:01:33", "remaining_time": "0:16:59"}
31
+ {"current_steps": 290, "total_steps": 3350, "loss": 0.0324, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9193323673337476e-05, "epoch": 0.43283582089552236, "percentage": 8.66, "elapsed_time": "0:01:36", "remaining_time": "0:16:53"}
32
+ {"current_steps": 300, "total_steps": 3350, "loss": 0.0246, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9132817544925085e-05, "epoch": 0.44776119402985076, "percentage": 8.96, "elapsed_time": "0:01:38", "remaining_time": "0:16:44"}
33
+ {"current_steps": 300, "total_steps": 3350, "loss": null, "eval_loss": 0.01584860309958458, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.44776119402985076, "percentage": 8.96, "elapsed_time": "0:01:38", "remaining_time": "0:16:44"}
34
+ {"current_steps": 310, "total_steps": 3350, "loss": 0.0124, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.907016350647961e-05, "epoch": 0.4626865671641791, "percentage": 9.25, "elapsed_time": "0:01:46", "remaining_time": "0:17:19"}
35
+ {"current_steps": 320, "total_steps": 3350, "loss": 0.0192, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.9005367134442235e-05, "epoch": 0.47761194029850745, "percentage": 9.55, "elapsed_time": "0:01:48", "remaining_time": "0:17:11"}
36
+ {"current_steps": 330, "total_steps": 3350, "loss": 0.0453, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.893843419592977e-05, "epoch": 0.4925373134328358, "percentage": 9.85, "elapsed_time": "0:01:51", "remaining_time": "0:17:02"}
37
+ {"current_steps": 340, "total_steps": 3350, "loss": 0.0186, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.886937064822134e-05, "epoch": 0.5074626865671642, "percentage": 10.15, "elapsed_time": "0:01:54", "remaining_time": "0:16:55"}
38
+ {"current_steps": 350, "total_steps": 3350, "loss": 0.0298, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8798182638228166e-05, "epoch": 0.5223880597014925, "percentage": 10.45, "elapsed_time": "0:01:57", "remaining_time": "0:16:49"}
39
+ {"current_steps": 360, "total_steps": 3350, "loss": 0.0201, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.872487650194647e-05, "epoch": 0.5373134328358209, "percentage": 10.75, "elapsed_time": "0:02:01", "remaining_time": "0:16:47"}
40
+ {"current_steps": 370, "total_steps": 3350, "loss": 0.0248, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.864945876389356e-05, "epoch": 0.5522388059701493, "percentage": 11.04, "elapsed_time": "0:02:04", "remaining_time": "0:16:40"}
41
+ {"current_steps": 380, "total_steps": 3350, "loss": 0.0128, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.857193613652711e-05, "epoch": 0.5671641791044776, "percentage": 11.34, "elapsed_time": "0:02:07", "remaining_time": "0:16:34"}
42
+ {"current_steps": 390, "total_steps": 3350, "loss": 0.0148, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.849231551964771e-05, "epoch": 0.582089552238806, "percentage": 11.64, "elapsed_time": "0:02:10", "remaining_time": "0:16:27"}
43
+ {"current_steps": 400, "total_steps": 3350, "loss": 0.031, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.841060399978481e-05, "epoch": 0.5970149253731343, "percentage": 11.94, "elapsed_time": "0:02:12", "remaining_time": "0:16:20"}
44
+ {"current_steps": 400, "total_steps": 3350, "loss": null, "eval_loss": 0.013003222644329071, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5970149253731343, "percentage": 11.94, "elapsed_time": "0:02:12", "remaining_time": "0:16:20"}
45
+ {"current_steps": 410, "total_steps": 3350, "loss": 0.0099, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.8326808849565936e-05, "epoch": 0.6119402985074627, "percentage": 12.24, "elapsed_time": "0:02:19", "remaining_time": "0:16:40"}
46
+ {"current_steps": 420, "total_steps": 3350, "loss": 0.0221, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.824093752706943e-05, "epoch": 0.6268656716417911, "percentage": 12.54, "elapsed_time": "0:02:22", "remaining_time": "0:16:33"}
47
+ {"current_steps": 430, "total_steps": 3350, "loss": 0.0093, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.815299767516065e-05, "epoch": 0.6417910447761194, "percentage": 12.84, "elapsed_time": "0:02:25", "remaining_time": "0:16:28"}
48
+ {"current_steps": 440, "total_steps": 3350, "loss": 0.0124, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.806299712081172e-05, "epoch": 0.6567164179104478, "percentage": 13.13, "elapsed_time": "0:02:28", "remaining_time": "0:16:23"}
49
+ {"current_steps": 450, "total_steps": 3350, "loss": 0.0018, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.797094387440491e-05, "epoch": 0.6716417910447762, "percentage": 13.43, "elapsed_time": "0:02:31", "remaining_time": "0:16:17"}
50
+ {"current_steps": 460, "total_steps": 3350, "loss": 0.0186, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.787684612901965e-05, "epoch": 0.6865671641791045, "percentage": 13.73, "elapsed_time": "0:02:34", "remaining_time": "0:16:11"}
51
+ {"current_steps": 470, "total_steps": 3350, "loss": 0.0268, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.77807122597034e-05, "epoch": 0.7014925373134329, "percentage": 14.03, "elapsed_time": "0:02:37", "remaining_time": "0:16:07"}
52
+ {"current_steps": 480, "total_steps": 3350, "loss": 0.0237, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.768255082272611e-05, "epoch": 0.7164179104477612, "percentage": 14.33, "elapsed_time": "0:02:40", "remaining_time": "0:16:01"}
53
+ {"current_steps": 490, "total_steps": 3350, "loss": 0.0176, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.758237055481881e-05, "epoch": 0.7313432835820896, "percentage": 14.63, "elapsed_time": "0:02:43", "remaining_time": "0:15:55"}
54
+ {"current_steps": 500, "total_steps": 3350, "loss": 0.0254, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.748018037239592e-05, "epoch": 0.746268656716418, "percentage": 14.93, "elapsed_time": "0:02:46", "remaining_time": "0:15:51"}
55
+ {"current_steps": 500, "total_steps": 3350, "loss": null, "eval_loss": 0.01669371873140335, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.746268656716418, "percentage": 14.93, "elapsed_time": "0:02:46", "remaining_time": "0:15:51"}
56
+ {"current_steps": 510, "total_steps": 3350, "loss": 0.0328, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7375989370761695e-05, "epoch": 0.7611940298507462, "percentage": 15.22, "elapsed_time": "0:02:53", "remaining_time": "0:16:06"}
57
+ {"current_steps": 520, "total_steps": 3350, "loss": 0.0166, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.726980682330071e-05, "epoch": 0.7761194029850746, "percentage": 15.52, "elapsed_time": "0:02:56", "remaining_time": "0:15:59"}
58
+ {"current_steps": 530, "total_steps": 3350, "loss": 0.0163, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7161642180652464e-05, "epoch": 0.7910447761194029, "percentage": 15.82, "elapsed_time": "0:02:59", "remaining_time": "0:15:53"}
59
+ {"current_steps": 540, "total_steps": 3350, "loss": 0.0277, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.7051505069870286e-05, "epoch": 0.8059701492537313, "percentage": 16.12, "elapsed_time": "0:03:02", "remaining_time": "0:15:48"}
60
+ {"current_steps": 550, "total_steps": 3350, "loss": 0.032, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.693940529356444e-05, "epoch": 0.8208955223880597, "percentage": 16.42, "elapsed_time": "0:03:05", "remaining_time": "0:15:43"}
61
+ {"current_steps": 560, "total_steps": 3350, "loss": 0.0182, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6825352829029705e-05, "epoch": 0.835820895522388, "percentage": 16.72, "elapsed_time": "0:03:08", "remaining_time": "0:15:37"}
62
+ {"current_steps": 570, "total_steps": 3350, "loss": 0.0133, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.670935782735732e-05, "epoch": 0.8507462686567164, "percentage": 17.01, "elapsed_time": "0:03:11", "remaining_time": "0:15:32"}
63
+ {"current_steps": 580, "total_steps": 3350, "loss": 0.0312, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6591430612531515e-05, "epoch": 0.8656716417910447, "percentage": 17.31, "elapsed_time": "0:03:14", "remaining_time": "0:15:29"}
64
+ {"current_steps": 590, "total_steps": 3350, "loss": 0.0268, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.647158168051066e-05, "epoch": 0.8805970149253731, "percentage": 17.61, "elapsed_time": "0:03:17", "remaining_time": "0:15:23"}
65
+ {"current_steps": 600, "total_steps": 3350, "loss": 0.0343, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6349821698293025e-05, "epoch": 0.8955223880597015, "percentage": 17.91, "elapsed_time": "0:03:20", "remaining_time": "0:15:19"}
66
+ {"current_steps": 600, "total_steps": 3350, "loss": null, "eval_loss": 0.017083635553717613, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.8955223880597015, "percentage": 17.91, "elapsed_time": "0:03:20", "remaining_time": "0:15:19"}
67
+ {"current_steps": 610, "total_steps": 3350, "loss": 0.0148, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.622616150296745e-05, "epoch": 0.9104477611940298, "percentage": 18.21, "elapsed_time": "0:03:27", "remaining_time": "0:15:31"}
68
+ {"current_steps": 620, "total_steps": 3350, "loss": 0.0236, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6100612100748765e-05, "epoch": 0.9253731343283582, "percentage": 18.51, "elapsed_time": "0:03:30", "remaining_time": "0:15:25"}
69
+ {"current_steps": 630, "total_steps": 3350, "loss": 0.0239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5973184665998186e-05, "epoch": 0.9402985074626866, "percentage": 18.81, "elapsed_time": "0:03:32", "remaining_time": "0:15:19"}
70
+ {"current_steps": 640, "total_steps": 3350, "loss": 0.0126, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5843890540228794e-05, "epoch": 0.9552238805970149, "percentage": 19.1, "elapsed_time": "0:03:36", "remaining_time": "0:15:15"}
71
+ {"current_steps": 650, "total_steps": 3350, "loss": 0.0079, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.571274123109606e-05, "epoch": 0.9701492537313433, "percentage": 19.4, "elapsed_time": "0:03:39", "remaining_time": "0:15:10"}
72
+ {"current_steps": 660, "total_steps": 3350, "loss": 0.017, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.557974841137364e-05, "epoch": 0.9850746268656716, "percentage": 19.7, "elapsed_time": "0:03:42", "remaining_time": "0:15:05"}
73
+ {"current_steps": 670, "total_steps": 3350, "loss": 0.0226, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.544492391791445e-05, "epoch": 1.0, "percentage": 20.0, "elapsed_time": "0:03:45", "remaining_time": "0:15:01"}
74
+ {"current_steps": 680, "total_steps": 3350, "loss": 0.0069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.530827975059715e-05, "epoch": 1.0149253731343284, "percentage": 20.3, "elapsed_time": "0:03:48", "remaining_time": "0:14:57"}
75
+ {"current_steps": 690, "total_steps": 3350, "loss": 0.0044, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5169828071258116e-05, "epoch": 1.0298507462686568, "percentage": 20.6, "elapsed_time": "0:03:51", "remaining_time": "0:14:51"}
76
+ {"current_steps": 700, "total_steps": 3350, "loss": 0.0057, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.502958120260894e-05, "epoch": 1.044776119402985, "percentage": 20.9, "elapsed_time": "0:03:54", "remaining_time": "0:14:47"}
77
+ {"current_steps": 700, "total_steps": 3350, "loss": null, "eval_loss": 0.012991434894502163, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.044776119402985, "percentage": 20.9, "elapsed_time": "0:03:54", "remaining_time": "0:14:47"}
78
+ {"current_steps": 700, "total_steps": 3350, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.044776119402985, "percentage": 20.9, "elapsed_time": "0:03:54", "remaining_time": "0:14:47"}
79
+ {"current_steps": 36, "total_steps": 36, "loss": null, "eval_loss": 0.012991434894502163, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.044776119402985, "percentage": 100.0, "elapsed_time": "0:04:01", "remaining_time": "0:00:00"}
llama3_8b_peft/goal_step_wikihow/trainer_state.json ADDED
@@ -0,0 +1,576 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.012991434894502163,
3
+ "best_model_checkpoint": "ckpt/llama3_8b_fuze27_no_sys/goal_step_wikihow_no_sys/checkpoint-700",
4
+ "epoch": 1.044776119402985,
5
+ "eval_steps": 100,
6
+ "global_step": 700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.014925373134328358,
13
+ "grad_norm": 8.14726734161377,
14
+ "learning_rate": 2.5e-05,
15
+ "loss": 1.7141,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.029850746268656716,
20
+ "grad_norm": 1.2164829969406128,
21
+ "learning_rate": 5e-05,
22
+ "loss": 0.7203,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.04477611940298507,
27
+ "grad_norm": 1.7879401445388794,
28
+ "learning_rate": 4.999888745376028e-05,
29
+ "loss": 0.0767,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.05970149253731343,
34
+ "grad_norm": 2.841306447982788,
35
+ "learning_rate": 4.9995549914061836e-05,
36
+ "loss": 0.1053,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.07462686567164178,
41
+ "grad_norm": 1.0241464376449585,
42
+ "learning_rate": 4.998998767795805e-05,
43
+ "loss": 0.0619,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.08955223880597014,
48
+ "grad_norm": 0.3842061758041382,
49
+ "learning_rate": 4.99822012405085e-05,
50
+ "loss": 0.0472,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.1044776119402985,
55
+ "grad_norm": 1.2642300128936768,
56
+ "learning_rate": 4.997219129473495e-05,
57
+ "loss": 0.0514,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.11940298507462686,
62
+ "grad_norm": 6.510857105255127,
63
+ "learning_rate": 4.995995873155958e-05,
64
+ "loss": 0.0441,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.13432835820895522,
69
+ "grad_norm": 1.8673882484436035,
70
+ "learning_rate": 4.994550463972577e-05,
71
+ "loss": 0.0507,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.14925373134328357,
76
+ "grad_norm": 0.7425503134727478,
77
+ "learning_rate": 4.992883030570116e-05,
78
+ "loss": 0.0401,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.14925373134328357,
83
+ "eval_loss": 0.029515134170651436,
84
+ "eval_runtime": 3.2081,
85
+ "eval_samples_per_second": 88.216,
86
+ "eval_steps_per_second": 11.222,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 0.16417910447761194,
91
+ "grad_norm": 0.8024762272834778,
92
+ "learning_rate": 4.9909937213563165e-05,
93
+ "loss": 0.0491,
94
+ "step": 110
95
+ },
96
+ {
97
+ "epoch": 0.1791044776119403,
98
+ "grad_norm": 1.1800884008407593,
99
+ "learning_rate": 4.988882704486687e-05,
100
+ "loss": 0.0251,
101
+ "step": 120
102
+ },
103
+ {
104
+ "epoch": 0.19402985074626866,
105
+ "grad_norm": 0.11055939644575119,
106
+ "learning_rate": 4.9865501678495375e-05,
107
+ "loss": 0.0366,
108
+ "step": 130
109
+ },
110
+ {
111
+ "epoch": 0.208955223880597,
112
+ "grad_norm": 4.5951247215271,
113
+ "learning_rate": 4.9839963190492576e-05,
114
+ "loss": 0.0313,
115
+ "step": 140
116
+ },
117
+ {
118
+ "epoch": 0.22388059701492538,
119
+ "grad_norm": 1.370669960975647,
120
+ "learning_rate": 4.9812213853878376e-05,
121
+ "loss": 0.037,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.23880597014925373,
126
+ "grad_norm": 4.61948823928833,
127
+ "learning_rate": 4.978225613844639e-05,
128
+ "loss": 0.0211,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.2537313432835821,
133
+ "grad_norm": 1.5214568376541138,
134
+ "learning_rate": 4.975009271054409e-05,
135
+ "loss": 0.0346,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 0.26865671641791045,
140
+ "grad_norm": 0.7119271159172058,
141
+ "learning_rate": 4.971572643283557e-05,
142
+ "loss": 0.0183,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 0.2835820895522388,
147
+ "grad_norm": 0.2844345271587372,
148
+ "learning_rate": 4.9679160364046644e-05,
149
+ "loss": 0.0244,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 0.29850746268656714,
154
+ "grad_norm": 2.362858295440674,
155
+ "learning_rate": 4.9640397758692715e-05,
156
+ "loss": 0.0476,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.29850746268656714,
161
+ "eval_loss": 0.014047912321984768,
162
+ "eval_runtime": 3.2089,
163
+ "eval_samples_per_second": 88.193,
164
+ "eval_steps_per_second": 11.219,
165
+ "step": 200
166
+ },
167
+ {
168
+ "epoch": 0.31343283582089554,
169
+ "grad_norm": 0.5341858267784119,
170
+ "learning_rate": 4.9599442066789035e-05,
171
+ "loss": 0.0289,
172
+ "step": 210
173
+ },
174
+ {
175
+ "epoch": 0.3283582089552239,
176
+ "grad_norm": 0.9342235326766968,
177
+ "learning_rate": 4.95562969335437e-05,
178
+ "loss": 0.0188,
179
+ "step": 220
180
+ },
181
+ {
182
+ "epoch": 0.34328358208955223,
183
+ "grad_norm": 0.43300461769104004,
184
+ "learning_rate": 4.9510966199033174e-05,
185
+ "loss": 0.0232,
186
+ "step": 230
187
+ },
188
+ {
189
+ "epoch": 0.3582089552238806,
190
+ "grad_norm": 0.17352116107940674,
191
+ "learning_rate": 4.946345389786049e-05,
192
+ "loss": 0.0232,
193
+ "step": 240
194
+ },
195
+ {
196
+ "epoch": 0.373134328358209,
197
+ "grad_norm": 1.7074532508850098,
198
+ "learning_rate": 4.941376425879624e-05,
199
+ "loss": 0.0315,
200
+ "step": 250
201
+ },
202
+ {
203
+ "epoch": 0.3880597014925373,
204
+ "grad_norm": 0.3088224232196808,
205
+ "learning_rate": 4.936190170440208e-05,
206
+ "loss": 0.0165,
207
+ "step": 260
208
+ },
209
+ {
210
+ "epoch": 0.40298507462686567,
211
+ "grad_norm": 0.6789383292198181,
212
+ "learning_rate": 4.930787085063723e-05,
213
+ "loss": 0.025,
214
+ "step": 270
215
+ },
216
+ {
217
+ "epoch": 0.417910447761194,
218
+ "grad_norm": 0.23857185244560242,
219
+ "learning_rate": 4.925167650644752e-05,
220
+ "loss": 0.0195,
221
+ "step": 280
222
+ },
223
+ {
224
+ "epoch": 0.43283582089552236,
225
+ "grad_norm": 0.1765979677438736,
226
+ "learning_rate": 4.9193323673337476e-05,
227
+ "loss": 0.0324,
228
+ "step": 290
229
+ },
230
+ {
231
+ "epoch": 0.44776119402985076,
232
+ "grad_norm": 0.21789054572582245,
233
+ "learning_rate": 4.9132817544925085e-05,
234
+ "loss": 0.0246,
235
+ "step": 300
236
+ },
237
+ {
238
+ "epoch": 0.44776119402985076,
239
+ "eval_loss": 0.01584860309958458,
240
+ "eval_runtime": 3.2304,
241
+ "eval_samples_per_second": 87.604,
242
+ "eval_steps_per_second": 11.144,
243
+ "step": 300
244
+ },
245
+ {
246
+ "epoch": 0.4626865671641791,
247
+ "grad_norm": 0.03349956497550011,
248
+ "learning_rate": 4.907016350647961e-05,
249
+ "loss": 0.0124,
250
+ "step": 310
251
+ },
252
+ {
253
+ "epoch": 0.47761194029850745,
254
+ "grad_norm": 4.244650363922119,
255
+ "learning_rate": 4.9005367134442235e-05,
256
+ "loss": 0.0192,
257
+ "step": 320
258
+ },
259
+ {
260
+ "epoch": 0.4925373134328358,
261
+ "grad_norm": 2.6396820545196533,
262
+ "learning_rate": 4.893843419592977e-05,
263
+ "loss": 0.0453,
264
+ "step": 330
265
+ },
266
+ {
267
+ "epoch": 0.5074626865671642,
268
+ "grad_norm": 0.09837789833545685,
269
+ "learning_rate": 4.886937064822134e-05,
270
+ "loss": 0.0186,
271
+ "step": 340
272
+ },
273
+ {
274
+ "epoch": 0.5223880597014925,
275
+ "grad_norm": 0.5431476831436157,
276
+ "learning_rate": 4.8798182638228166e-05,
277
+ "loss": 0.0298,
278
+ "step": 350
279
+ },
280
+ {
281
+ "epoch": 0.5373134328358209,
282
+ "grad_norm": 0.36697104573249817,
283
+ "learning_rate": 4.872487650194647e-05,
284
+ "loss": 0.0201,
285
+ "step": 360
286
+ },
287
+ {
288
+ "epoch": 0.5522388059701493,
289
+ "grad_norm": 0.7214462757110596,
290
+ "learning_rate": 4.864945876389356e-05,
291
+ "loss": 0.0248,
292
+ "step": 370
293
+ },
294
+ {
295
+ "epoch": 0.5671641791044776,
296
+ "grad_norm": 1.7586629390716553,
297
+ "learning_rate": 4.857193613652711e-05,
298
+ "loss": 0.0128,
299
+ "step": 380
300
+ },
301
+ {
302
+ "epoch": 0.582089552238806,
303
+ "grad_norm": 0.7505581974983215,
304
+ "learning_rate": 4.849231551964771e-05,
305
+ "loss": 0.0148,
306
+ "step": 390
307
+ },
308
+ {
309
+ "epoch": 0.5970149253731343,
310
+ "grad_norm": 2.8461360931396484,
311
+ "learning_rate": 4.841060399978481e-05,
312
+ "loss": 0.031,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 0.5970149253731343,
317
+ "eval_loss": 0.013003222644329071,
318
+ "eval_runtime": 3.2553,
319
+ "eval_samples_per_second": 86.936,
320
+ "eval_steps_per_second": 11.059,
321
+ "step": 400
322
+ },
323
+ {
324
+ "epoch": 0.6119402985074627,
325
+ "grad_norm": 0.7241692543029785,
326
+ "learning_rate": 4.8326808849565936e-05,
327
+ "loss": 0.0099,
328
+ "step": 410
329
+ },
330
+ {
331
+ "epoch": 0.6268656716417911,
332
+ "grad_norm": 0.3313581943511963,
333
+ "learning_rate": 4.824093752706943e-05,
334
+ "loss": 0.0221,
335
+ "step": 420
336
+ },
337
+ {
338
+ "epoch": 0.6417910447761194,
339
+ "grad_norm": 1.4017741680145264,
340
+ "learning_rate": 4.815299767516065e-05,
341
+ "loss": 0.0093,
342
+ "step": 430
343
+ },
344
+ {
345
+ "epoch": 0.6567164179104478,
346
+ "grad_norm": 0.006704133003950119,
347
+ "learning_rate": 4.806299712081172e-05,
348
+ "loss": 0.0124,
349
+ "step": 440
350
+ },
351
+ {
352
+ "epoch": 0.6716417910447762,
353
+ "grad_norm": 0.03371915966272354,
354
+ "learning_rate": 4.797094387440491e-05,
355
+ "loss": 0.0018,
356
+ "step": 450
357
+ },
358
+ {
359
+ "epoch": 0.6865671641791045,
360
+ "grad_norm": 3.373279333114624,
361
+ "learning_rate": 4.787684612901965e-05,
362
+ "loss": 0.0186,
363
+ "step": 460
364
+ },
365
+ {
366
+ "epoch": 0.7014925373134329,
367
+ "grad_norm": 2.0351741313934326,
368
+ "learning_rate": 4.77807122597034e-05,
369
+ "loss": 0.0268,
370
+ "step": 470
371
+ },
372
+ {
373
+ "epoch": 0.7164179104477612,
374
+ "grad_norm": 0.08731024712324142,
375
+ "learning_rate": 4.768255082272611e-05,
376
+ "loss": 0.0237,
377
+ "step": 480
378
+ },
379
+ {
380
+ "epoch": 0.7313432835820896,
381
+ "grad_norm": 1.5778895616531372,
382
+ "learning_rate": 4.758237055481881e-05,
383
+ "loss": 0.0176,
384
+ "step": 490
385
+ },
386
+ {
387
+ "epoch": 0.746268656716418,
388
+ "grad_norm": 0.40934962034225464,
389
+ "learning_rate": 4.748018037239592e-05,
390
+ "loss": 0.0254,
391
+ "step": 500
392
+ },
393
+ {
394
+ "epoch": 0.746268656716418,
395
+ "eval_loss": 0.01669371873140335,
396
+ "eval_runtime": 3.2533,
397
+ "eval_samples_per_second": 86.988,
398
+ "eval_steps_per_second": 11.066,
399
+ "step": 500
400
+ },
401
+ {
402
+ "epoch": 0.7611940298507462,
403
+ "grad_norm": 2.0718159675598145,
404
+ "learning_rate": 4.7375989370761695e-05,
405
+ "loss": 0.0328,
406
+ "step": 510
407
+ },
408
+ {
409
+ "epoch": 0.7761194029850746,
410
+ "grad_norm": 0.26902905106544495,
411
+ "learning_rate": 4.726980682330071e-05,
412
+ "loss": 0.0166,
413
+ "step": 520
414
+ },
415
+ {
416
+ "epoch": 0.7910447761194029,
417
+ "grad_norm": 0.2976718246936798,
418
+ "learning_rate": 4.7161642180652464e-05,
419
+ "loss": 0.0163,
420
+ "step": 530
421
+ },
422
+ {
423
+ "epoch": 0.8059701492537313,
424
+ "grad_norm": 1.0103188753128052,
425
+ "learning_rate": 4.7051505069870286e-05,
426
+ "loss": 0.0277,
427
+ "step": 540
428
+ },
429
+ {
430
+ "epoch": 0.8208955223880597,
431
+ "grad_norm": 2.193516969680786,
432
+ "learning_rate": 4.693940529356444e-05,
433
+ "loss": 0.032,
434
+ "step": 550
435
+ },
436
+ {
437
+ "epoch": 0.835820895522388,
438
+ "grad_norm": 0.06206831708550453,
439
+ "learning_rate": 4.6825352829029705e-05,
440
+ "loss": 0.0182,
441
+ "step": 560
442
+ },
443
+ {
444
+ "epoch": 0.8507462686567164,
445
+ "grad_norm": 0.9270901083946228,
446
+ "learning_rate": 4.670935782735732e-05,
447
+ "loss": 0.0133,
448
+ "step": 570
449
+ },
450
+ {
451
+ "epoch": 0.8656716417910447,
452
+ "grad_norm": 2.6464357376098633,
453
+ "learning_rate": 4.6591430612531515e-05,
454
+ "loss": 0.0312,
455
+ "step": 580
456
+ },
457
+ {
458
+ "epoch": 0.8805970149253731,
459
+ "grad_norm": 0.317035436630249,
460
+ "learning_rate": 4.647158168051066e-05,
461
+ "loss": 0.0268,
462
+ "step": 590
463
+ },
464
+ {
465
+ "epoch": 0.8955223880597015,
466
+ "grad_norm": 1.4276162385940552,
467
+ "learning_rate": 4.6349821698293025e-05,
468
+ "loss": 0.0343,
469
+ "step": 600
470
+ },
471
+ {
472
+ "epoch": 0.8955223880597015,
473
+ "eval_loss": 0.017083635553717613,
474
+ "eval_runtime": 3.2547,
475
+ "eval_samples_per_second": 86.951,
476
+ "eval_steps_per_second": 11.061,
477
+ "step": 600
478
+ },
479
+ {
480
+ "epoch": 0.9104477611940298,
481
+ "grad_norm": 0.7979409694671631,
482
+ "learning_rate": 4.622616150296745e-05,
483
+ "loss": 0.0148,
484
+ "step": 610
485
+ },
486
+ {
487
+ "epoch": 0.9253731343283582,
488
+ "grad_norm": 3.8947601318359375,
489
+ "learning_rate": 4.6100612100748765e-05,
490
+ "loss": 0.0236,
491
+ "step": 620
492
+ },
493
+ {
494
+ "epoch": 0.9402985074626866,
495
+ "grad_norm": 2.535515069961548,
496
+ "learning_rate": 4.5973184665998186e-05,
497
+ "loss": 0.0239,
498
+ "step": 630
499
+ },
500
+ {
501
+ "epoch": 0.9552238805970149,
502
+ "grad_norm": 0.07634054869413376,
503
+ "learning_rate": 4.5843890540228794e-05,
504
+ "loss": 0.0126,
505
+ "step": 640
506
+ },
507
+ {
508
+ "epoch": 0.9701492537313433,
509
+ "grad_norm": 0.07099244743585587,
510
+ "learning_rate": 4.571274123109606e-05,
511
+ "loss": 0.0079,
512
+ "step": 650
513
+ },
514
+ {
515
+ "epoch": 0.9850746268656716,
516
+ "grad_norm": 1.9397854804992676,
517
+ "learning_rate": 4.557974841137364e-05,
518
+ "loss": 0.017,
519
+ "step": 660
520
+ },
521
+ {
522
+ "epoch": 1.0,
523
+ "grad_norm": 0.3241177499294281,
524
+ "learning_rate": 4.544492391791445e-05,
525
+ "loss": 0.0226,
526
+ "step": 670
527
+ },
528
+ {
529
+ "epoch": 1.0149253731343284,
530
+ "grad_norm": 0.12523053586483002,
531
+ "learning_rate": 4.530827975059715e-05,
532
+ "loss": 0.0069,
533
+ "step": 680
534
+ },
535
+ {
536
+ "epoch": 1.0298507462686568,
537
+ "grad_norm": 0.3251120448112488,
538
+ "learning_rate": 4.5169828071258116e-05,
539
+ "loss": 0.0044,
540
+ "step": 690
541
+ },
542
+ {
543
+ "epoch": 1.044776119402985,
544
+ "grad_norm": 0.15444570779800415,
545
+ "learning_rate": 4.502958120260894e-05,
546
+ "loss": 0.0057,
547
+ "step": 700
548
+ },
549
+ {
550
+ "epoch": 1.044776119402985,
551
+ "eval_loss": 0.012991434894502163,
552
+ "eval_runtime": 3.2577,
553
+ "eval_samples_per_second": 86.87,
554
+ "eval_steps_per_second": 11.051,
555
+ "step": 700
556
+ },
557
+ {
558
+ "epoch": 1.044776119402985,
559
+ "step": 700,
560
+ "total_flos": 3.130965478814515e+16,
561
+ "train_loss": 0.060939116749380316,
562
+ "train_runtime": 244.1628,
563
+ "train_samples_per_second": 109.763,
564
+ "train_steps_per_second": 13.72
565
+ }
566
+ ],
567
+ "logging_steps": 10,
568
+ "max_steps": 3350,
569
+ "num_input_tokens_seen": 0,
570
+ "num_train_epochs": 5,
571
+ "save_steps": 100,
572
+ "total_flos": 3.130965478814515e+16,
573
+ "train_batch_size": 8,
574
+ "trial_name": null,
575
+ "trial_params": null
576
+ }
llama3_8b_peft/goal_step_wikihow/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:224c1a42567e1655bc6599547db63f6cadc7acf143743e1cfed8cbeed4981427
3
+ size 5176
llama3_8b_peft/goal_step_wikihow/training_eval_loss.png ADDED
llama3_8b_peft/goal_step_wikihow/training_loss.png ADDED
llama3_8b_peft/gsm8k/README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: gsm8k_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # gsm8k_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the gsm8k_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.4556
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
+ - train_batch_size: 8
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 2
46
+ - total_train_batch_size: 16
47
+ - total_eval_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_steps: 20
51
+ - num_epochs: 10.0
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:------:|:----:|:---------------:|
57
+ | 0.5398 | 0.5038 | 200 | 0.5354 |
58
+ | 0.5337 | 1.0076 | 400 | 0.5050 |
59
+ | 0.5133 | 1.5113 | 600 | 0.4899 |
60
+ | 0.4912 | 2.0151 | 800 | 0.4774 |
61
+ | 0.4573 | 2.5189 | 1000 | 0.4706 |
62
+ | 0.4628 | 3.0227 | 1200 | 0.4644 |
63
+ | 0.4429 | 3.5264 | 1400 | 0.4594 |
64
+ | 0.4058 | 4.0302 | 1600 | 0.4588 |
65
+ | 0.4365 | 4.5340 | 1800 | 0.4559 |
66
+ | 0.4189 | 5.0378 | 2000 | 0.4556 |
67
+ | 0.4096 | 5.5416 | 2200 | 0.4561 |
68
+ | 0.4087 | 6.0453 | 2400 | 0.4592 |
69
+ | 0.4119 | 6.5491 | 2600 | 0.4569 |
70
+ | 0.4103 | 7.0529 | 2800 | 0.4607 |
71
+
72
+
73
+ ### Framework versions
74
+
75
+ - PEFT 0.10.0
76
+ - Transformers 4.40.0
77
+ - Pytorch 2.2.1
78
+ - Datasets 2.18.0
79
+ - Tokenizers 0.19.1
llama3_8b_peft/gsm8k/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "up_proj",
25
+ "v_proj",
26
+ "o_proj",
27
+ "down_proj",
28
+ "k_proj",
29
+ "gate_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/gsm8k/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3420ecdf6da003cab56f7393f669677dd28564cf199f4f73b7fe9dbcdefd708
3
+ size 83945296
llama3_8b_peft/gsm8k/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.052896725440806,
3
+ "eval_loss": 0.4556237757205963,
4
+ "eval_runtime": 13.2543,
5
+ "eval_samples_per_second": 84.577,
6
+ "eval_steps_per_second": 5.357,
7
+ "total_flos": 5.301631426725151e+17,
8
+ "train_loss": 0.4707381465605327,
9
+ "train_runtime": 2143.7031,
10
+ "train_samples_per_second": 29.631,
11
+ "train_steps_per_second": 1.852
12
+ }
llama3_8b_peft/gsm8k/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.052896725440806,
3
+ "eval_loss": 0.4556237757205963,
4
+ "eval_runtime": 13.2543,
5
+ "eval_samples_per_second": 84.577,
6
+ "eval_steps_per_second": 5.357
7
+ }
llama3_8b_peft/gsm8k/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/gsm8k/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/gsm8k/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }
llama3_8b_peft/gsm8k/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7.052896725440806,
3
+ "total_flos": 5.301631426725151e+17,
4
+ "train_loss": 0.4707381465605327,
5
+ "train_runtime": 2143.7031,
6
+ "train_samples_per_second": 29.631,
7
+ "train_steps_per_second": 1.852
8
+ }
llama3_8b_peft/gsm8k/trainer_log.jsonl ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 3970, "loss": 1.2636, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5e-06, "epoch": 0.02518891687657431, "percentage": 0.25, "elapsed_time": "0:00:07", "remaining_time": "0:48:28"}
2
+ {"current_steps": 20, "total_steps": 3970, "loss": 1.2109, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 1e-05, "epoch": 0.05037783375314862, "percentage": 0.5, "elapsed_time": "0:00:13", "remaining_time": "0:45:34"}
3
+ {"current_steps": 30, "total_steps": 3970, "loss": 1.2202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.999841859439597e-06, "epoch": 0.07556675062972293, "percentage": 0.76, "elapsed_time": "0:00:20", "remaining_time": "0:44:35"}
4
+ {"current_steps": 40, "total_steps": 3970, "loss": 1.1029, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.999367447761763e-06, "epoch": 0.10075566750629723, "percentage": 1.01, "elapsed_time": "0:00:26", "remaining_time": "0:43:18"}
5
+ {"current_steps": 50, "total_steps": 3970, "loss": 0.9679, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.998576794975987e-06, "epoch": 0.12594458438287154, "percentage": 1.26, "elapsed_time": "0:00:32", "remaining_time": "0:41:57"}
6
+ {"current_steps": 60, "total_steps": 3970, "loss": 0.8076, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.997469951095982e-06, "epoch": 0.15113350125944586, "percentage": 1.51, "elapsed_time": "0:00:38", "remaining_time": "0:42:02"}
7
+ {"current_steps": 70, "total_steps": 3970, "loss": 0.7149, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.99604698613651e-06, "epoch": 0.17632241813602015, "percentage": 1.76, "elapsed_time": "0:00:45", "remaining_time": "0:42:02"}
8
+ {"current_steps": 80, "total_steps": 3970, "loss": 0.6482, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.994307990108963e-06, "epoch": 0.20151133501259447, "percentage": 2.02, "elapsed_time": "0:00:51", "remaining_time": "0:41:52"}
9
+ {"current_steps": 90, "total_steps": 3970, "loss": 0.629, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.992253073015664e-06, "epoch": 0.22670025188916876, "percentage": 2.27, "elapsed_time": "0:00:58", "remaining_time": "0:41:41"}
10
+ {"current_steps": 100, "total_steps": 3970, "loss": 0.589, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.989882364842906e-06, "epoch": 0.2518891687657431, "percentage": 2.52, "elapsed_time": "0:01:04", "remaining_time": "0:41:19"}
11
+ {"current_steps": 110, "total_steps": 3970, "loss": 0.6087, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.987196015552742e-06, "epoch": 0.2770780856423174, "percentage": 2.77, "elapsed_time": "0:01:10", "remaining_time": "0:41:02"}
12
+ {"current_steps": 120, "total_steps": 3970, "loss": 0.5849, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.98419419507348e-06, "epoch": 0.3022670025188917, "percentage": 3.02, "elapsed_time": "0:01:16", "remaining_time": "0:40:39"}
13
+ {"current_steps": 130, "total_steps": 3970, "loss": 0.5495, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.980877093288953e-06, "epoch": 0.327455919395466, "percentage": 3.27, "elapsed_time": "0:01:22", "remaining_time": "0:40:24"}
14
+ {"current_steps": 140, "total_steps": 3970, "loss": 0.5916, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.97724492002649e-06, "epoch": 0.3526448362720403, "percentage": 3.53, "elapsed_time": "0:01:28", "remaining_time": "0:40:10"}
15
+ {"current_steps": 150, "total_steps": 3970, "loss": 0.624, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.973297905043662e-06, "epoch": 0.3778337531486146, "percentage": 3.78, "elapsed_time": "0:01:34", "remaining_time": "0:40:07"}
16
+ {"current_steps": 160, "total_steps": 3970, "loss": 0.5867, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.969036298013732e-06, "epoch": 0.40302267002518893, "percentage": 4.03, "elapsed_time": "0:01:40", "remaining_time": "0:40:03"}
17
+ {"current_steps": 170, "total_steps": 3970, "loss": 0.5561, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.964460368509868e-06, "epoch": 0.4282115869017632, "percentage": 4.28, "elapsed_time": "0:01:47", "remaining_time": "0:39:55"}
18
+ {"current_steps": 180, "total_steps": 3970, "loss": 0.577, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.959570405988096e-06, "epoch": 0.4534005037783375, "percentage": 4.53, "elapsed_time": "0:01:53", "remaining_time": "0:39:45"}
19
+ {"current_steps": 190, "total_steps": 3970, "loss": 0.5833, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.954366719768975e-06, "epoch": 0.47858942065491183, "percentage": 4.79, "elapsed_time": "0:01:59", "remaining_time": "0:39:33"}
20
+ {"current_steps": 200, "total_steps": 3970, "loss": 0.5398, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.948849639018055e-06, "epoch": 0.5037783375314862, "percentage": 5.04, "elapsed_time": "0:02:05", "remaining_time": "0:39:26"}
21
+ {"current_steps": 200, "total_steps": 3970, "loss": null, "eval_loss": 0.5354337692260742, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 0.5037783375314862, "percentage": 5.04, "elapsed_time": "0:02:05", "remaining_time": "0:39:26"}
22
+ {"current_steps": 210, "total_steps": 3970, "loss": 0.5574, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.943019512725026e-06, "epoch": 0.5289672544080605, "percentage": 5.29, "elapsed_time": "0:02:25", "remaining_time": "0:43:20"}
23
+ {"current_steps": 220, "total_steps": 3970, "loss": 0.5173, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.936876709681668e-06, "epoch": 0.5541561712846348, "percentage": 5.54, "elapsed_time": "0:02:31", "remaining_time": "0:42:59"}
24
+ {"current_steps": 230, "total_steps": 3970, "loss": 0.5581, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.930421618458506e-06, "epoch": 0.5793450881612091, "percentage": 5.79, "elapsed_time": "0:02:37", "remaining_time": "0:42:46"}
25
+ {"current_steps": 240, "total_steps": 3970, "loss": 0.5654, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.923654647380236e-06, "epoch": 0.6045340050377834, "percentage": 6.05, "elapsed_time": "0:02:43", "remaining_time": "0:42:24"}
26
+ {"current_steps": 250, "total_steps": 3970, "loss": 0.5542, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.916576224499898e-06, "epoch": 0.6297229219143576, "percentage": 6.3, "elapsed_time": "0:02:50", "remaining_time": "0:42:10"}
27
+ {"current_steps": 260, "total_steps": 3970, "loss": 0.5195, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.9091867975718e-06, "epoch": 0.654911838790932, "percentage": 6.55, "elapsed_time": "0:02:56", "remaining_time": "0:41:55"}
28
+ {"current_steps": 270, "total_steps": 3970, "loss": 0.5479, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.901486834023182e-06, "epoch": 0.6801007556675063, "percentage": 6.8, "elapsed_time": "0:03:02", "remaining_time": "0:41:36"}
29
+ {"current_steps": 280, "total_steps": 3970, "loss": 0.5274, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.893476820924668e-06, "epoch": 0.7052896725440806, "percentage": 7.05, "elapsed_time": "0:03:08", "remaining_time": "0:41:24"}
30
+ {"current_steps": 290, "total_steps": 3970, "loss": 0.5228, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.885157264959442e-06, "epoch": 0.7304785894206549, "percentage": 7.3, "elapsed_time": "0:03:15", "remaining_time": "0:41:16"}
31
+ {"current_steps": 300, "total_steps": 3970, "loss": 0.5234, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.8765286923912e-06, "epoch": 0.7556675062972292, "percentage": 7.56, "elapsed_time": "0:03:21", "remaining_time": "0:41:04"}
32
+ {"current_steps": 310, "total_steps": 3970, "loss": 0.5475, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.867591649030863e-06, "epoch": 0.7808564231738035, "percentage": 7.81, "elapsed_time": "0:03:27", "remaining_time": "0:40:52"}
33
+ {"current_steps": 320, "total_steps": 3970, "loss": 0.5108, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.85834670020205e-06, "epoch": 0.8060453400503779, "percentage": 8.06, "elapsed_time": "0:03:34", "remaining_time": "0:40:45"}
34
+ {"current_steps": 330, "total_steps": 3970, "loss": 0.5085, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.848794430705317e-06, "epoch": 0.8312342569269522, "percentage": 8.31, "elapsed_time": "0:03:41", "remaining_time": "0:40:39"}
35
+ {"current_steps": 340, "total_steps": 3970, "loss": 0.5449, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.838935444781162e-06, "epoch": 0.8564231738035264, "percentage": 8.56, "elapsed_time": "0:03:48", "remaining_time": "0:40:37"}
36
+ {"current_steps": 350, "total_steps": 3970, "loss": 0.5203, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.82877036607181e-06, "epoch": 0.8816120906801007, "percentage": 8.82, "elapsed_time": "0:03:55", "remaining_time": "0:40:30"}
37
+ {"current_steps": 360, "total_steps": 3970, "loss": 0.5225, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.818299837581758e-06, "epoch": 0.906801007556675, "percentage": 9.07, "elapsed_time": "0:04:02", "remaining_time": "0:40:28"}
38
+ {"current_steps": 370, "total_steps": 3970, "loss": 0.5323, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.807524521637103e-06, "epoch": 0.9319899244332494, "percentage": 9.32, "elapsed_time": "0:04:08", "remaining_time": "0:40:20"}
39
+ {"current_steps": 380, "total_steps": 3970, "loss": 0.5494, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.796445099843648e-06, "epoch": 0.9571788413098237, "percentage": 9.57, "elapsed_time": "0:04:15", "remaining_time": "0:40:09"}
40
+ {"current_steps": 390, "total_steps": 3970, "loss": 0.5503, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.785062273043778e-06, "epoch": 0.982367758186398, "percentage": 9.82, "elapsed_time": "0:04:20", "remaining_time": "0:39:55"}
41
+ {"current_steps": 400, "total_steps": 3970, "loss": 0.5337, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.77337676127214e-06, "epoch": 1.0075566750629723, "percentage": 10.08, "elapsed_time": "0:04:27", "remaining_time": "0:39:45"}
42
+ {"current_steps": 400, "total_steps": 3970, "loss": null, "eval_loss": 0.5050087571144104, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.0075566750629723, "percentage": 10.08, "elapsed_time": "0:04:27", "remaining_time": "0:39:45"}
43
+ {"current_steps": 410, "total_steps": 3970, "loss": 0.5138, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.761389303710085e-06, "epoch": 1.0327455919395465, "percentage": 10.33, "elapsed_time": "0:04:47", "remaining_time": "0:41:33"}
44
+ {"current_steps": 420, "total_steps": 3970, "loss": 0.5152, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.749100658638914e-06, "epoch": 1.057934508816121, "percentage": 10.58, "elapsed_time": "0:04:53", "remaining_time": "0:41:20"}
45
+ {"current_steps": 430, "total_steps": 3970, "loss": 0.5208, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.736511603391917e-06, "epoch": 1.0831234256926952, "percentage": 10.83, "elapsed_time": "0:04:59", "remaining_time": "0:41:07"}
46
+ {"current_steps": 440, "total_steps": 3970, "loss": 0.5271, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.723622934305193e-06, "epoch": 1.1083123425692696, "percentage": 11.08, "elapsed_time": "0:05:05", "remaining_time": "0:40:53"}
47
+ {"current_steps": 450, "total_steps": 3970, "loss": 0.4956, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.710435466667281e-06, "epoch": 1.1335012594458438, "percentage": 11.34, "elapsed_time": "0:05:12", "remaining_time": "0:40:43"}
48
+ {"current_steps": 460, "total_steps": 3970, "loss": 0.514, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.696950034667595e-06, "epoch": 1.1586901763224182, "percentage": 11.59, "elapsed_time": "0:05:18", "remaining_time": "0:40:29"}
49
+ {"current_steps": 470, "total_steps": 3970, "loss": 0.4965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.68316749134364e-06, "epoch": 1.1838790931989924, "percentage": 11.84, "elapsed_time": "0:05:24", "remaining_time": "0:40:19"}
50
+ {"current_steps": 480, "total_steps": 3970, "loss": 0.5264, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.669088708527068e-06, "epoch": 1.2090680100755669, "percentage": 12.09, "elapsed_time": "0:05:31", "remaining_time": "0:40:10"}
51
+ {"current_steps": 490, "total_steps": 3970, "loss": 0.478, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.654714576788521e-06, "epoch": 1.234256926952141, "percentage": 12.34, "elapsed_time": "0:05:37", "remaining_time": "0:39:58"}
52
+ {"current_steps": 500, "total_steps": 3970, "loss": 0.5274, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.640046005381299e-06, "epoch": 1.2594458438287153, "percentage": 12.59, "elapsed_time": "0:05:43", "remaining_time": "0:39:45"}
53
+ {"current_steps": 510, "total_steps": 3970, "loss": 0.4912, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.62508392218384e-06, "epoch": 1.2846347607052897, "percentage": 12.85, "elapsed_time": "0:05:50", "remaining_time": "0:39:36"}
54
+ {"current_steps": 520, "total_steps": 3970, "loss": 0.4978, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.609829273641034e-06, "epoch": 1.309823677581864, "percentage": 13.1, "elapsed_time": "0:05:58", "remaining_time": "0:39:40"}
55
+ {"current_steps": 530, "total_steps": 3970, "loss": 0.5496, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.59428302470435e-06, "epoch": 1.3350125944584383, "percentage": 13.35, "elapsed_time": "0:06:11", "remaining_time": "0:40:09"}
56
+ {"current_steps": 540, "total_steps": 3970, "loss": 0.4816, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.578446158770795e-06, "epoch": 1.3602015113350125, "percentage": 13.6, "elapsed_time": "0:06:23", "remaining_time": "0:40:35"}
57
+ {"current_steps": 550, "total_steps": 3970, "loss": 0.5116, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.56231967762071e-06, "epoch": 1.385390428211587, "percentage": 13.85, "elapsed_time": "0:06:33", "remaining_time": "0:40:46"}
58
+ {"current_steps": 560, "total_steps": 3970, "loss": 0.5327, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.545904601354402e-06, "epoch": 1.4105793450881612, "percentage": 14.11, "elapsed_time": "0:06:48", "remaining_time": "0:41:25"}
59
+ {"current_steps": 570, "total_steps": 3970, "loss": 0.4983, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.529201968327618e-06, "epoch": 1.4357682619647356, "percentage": 14.36, "elapsed_time": "0:06:55", "remaining_time": "0:41:20"}
60
+ {"current_steps": 580, "total_steps": 3970, "loss": 0.546, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.51221283508585e-06, "epoch": 1.4609571788413098, "percentage": 14.61, "elapsed_time": "0:07:04", "remaining_time": "0:41:18"}
61
+ {"current_steps": 590, "total_steps": 3970, "loss": 0.5256, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.494938276297523e-06, "epoch": 1.486146095717884, "percentage": 14.86, "elapsed_time": "0:07:15", "remaining_time": "0:41:35"}
62
+ {"current_steps": 600, "total_steps": 3970, "loss": 0.5133, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.477379384686e-06, "epoch": 1.5113350125944585, "percentage": 15.11, "elapsed_time": "0:07:26", "remaining_time": "0:41:47"}
63
+ {"current_steps": 600, "total_steps": 3970, "loss": null, "eval_loss": 0.4898512363433838, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 1.5113350125944585, "percentage": 15.11, "elapsed_time": "0:07:26", "remaining_time": "0:41:47"}
64
+ {"current_steps": 610, "total_steps": 3970, "loss": 0.4974, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.459537270960464e-06, "epoch": 1.536523929471033, "percentage": 15.37, "elapsed_time": "0:07:59", "remaining_time": "0:44:03"}
65
+ {"current_steps": 620, "total_steps": 3970, "loss": 0.5036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.44141306374566e-06, "epoch": 1.561712846347607, "percentage": 15.62, "elapsed_time": "0:08:08", "remaining_time": "0:44:00"}
66
+ {"current_steps": 630, "total_steps": 3970, "loss": 0.5056, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.423007909510504e-06, "epoch": 1.5869017632241813, "percentage": 15.87, "elapsed_time": "0:08:16", "remaining_time": "0:43:51"}
67
+ {"current_steps": 640, "total_steps": 3970, "loss": 0.5134, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.404322972495555e-06, "epoch": 1.6120906801007555, "percentage": 16.12, "elapsed_time": "0:08:28", "remaining_time": "0:44:05"}
68
+ {"current_steps": 650, "total_steps": 3970, "loss": 0.4734, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.385359434639381e-06, "epoch": 1.63727959697733, "percentage": 16.37, "elapsed_time": "0:08:40", "remaining_time": "0:44:17"}
69
+ {"current_steps": 660, "total_steps": 3970, "loss": 0.4851, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.36611849550378e-06, "epoch": 1.6624685138539044, "percentage": 16.62, "elapsed_time": "0:08:49", "remaining_time": "0:44:13"}
70
+ {"current_steps": 670, "total_steps": 3970, "loss": 0.4863, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.346601372197914e-06, "epoch": 1.6876574307304786, "percentage": 16.88, "elapsed_time": "0:08:58", "remaining_time": "0:44:14"}
71
+ {"current_steps": 680, "total_steps": 3970, "loss": 0.4972, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.326809299301308e-06, "epoch": 1.7128463476070528, "percentage": 17.13, "elapsed_time": "0:09:10", "remaining_time": "0:44:21"}
72
+ {"current_steps": 690, "total_steps": 3970, "loss": 0.4876, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.306743528785762e-06, "epoch": 1.7380352644836272, "percentage": 17.38, "elapsed_time": "0:09:18", "remaining_time": "0:44:14"}
73
+ {"current_steps": 700, "total_steps": 3970, "loss": 0.5068, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.286405329936153e-06, "epoch": 1.7632241813602016, "percentage": 17.63, "elapsed_time": "0:09:31", "remaining_time": "0:44:28"}
74
+ {"current_steps": 710, "total_steps": 3970, "loss": 0.4894, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.265795989270148e-06, "epoch": 1.7884130982367759, "percentage": 17.88, "elapsed_time": "0:09:44", "remaining_time": "0:44:45"}
75
+ {"current_steps": 720, "total_steps": 3970, "loss": 0.4776, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.244916810456822e-06, "epoch": 1.81360201511335, "percentage": 18.14, "elapsed_time": "0:10:03", "remaining_time": "0:45:23"}
76
+ {"current_steps": 730, "total_steps": 3970, "loss": 0.4689, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.223769114234185e-06, "epoch": 1.8387909319899243, "percentage": 18.39, "elapsed_time": "0:10:14", "remaining_time": "0:45:25"}
77
+ {"current_steps": 740, "total_steps": 3970, "loss": 0.4869, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.202354238325652e-06, "epoch": 1.8639798488664987, "percentage": 18.64, "elapsed_time": "0:10:23", "remaining_time": "0:45:22"}
78
+ {"current_steps": 750, "total_steps": 3970, "loss": 0.4767, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.180673537355414e-06, "epoch": 1.8891687657430731, "percentage": 18.89, "elapsed_time": "0:10:41", "remaining_time": "0:45:56"}
79
+ {"current_steps": 760, "total_steps": 3970, "loss": 0.5064, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.158728382762753e-06, "epoch": 1.9143576826196473, "percentage": 19.14, "elapsed_time": "0:10:49", "remaining_time": "0:45:45"}
80
+ {"current_steps": 770, "total_steps": 3970, "loss": 0.5379, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.136520162715288e-06, "epoch": 1.9395465994962215, "percentage": 19.4, "elapsed_time": "0:11:03", "remaining_time": "0:45:59"}
81
+ {"current_steps": 780, "total_steps": 3970, "loss": 0.4965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.11405028202116e-06, "epoch": 1.964735516372796, "percentage": 19.65, "elapsed_time": "0:11:17", "remaining_time": "0:46:10"}
82
+ {"current_steps": 790, "total_steps": 3970, "loss": 0.4806, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.091320162040183e-06, "epoch": 1.9899244332493704, "percentage": 19.9, "elapsed_time": "0:11:27", "remaining_time": "0:46:07"}
83
+ {"current_steps": 800, "total_steps": 3970, "loss": 0.4912, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.068331240593919e-06, "epoch": 2.0151133501259446, "percentage": 20.15, "elapsed_time": "0:11:36", "remaining_time": "0:45:58"}
84
+ {"current_steps": 800, "total_steps": 3970, "loss": null, "eval_loss": 0.47740957140922546, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.0151133501259446, "percentage": 20.15, "elapsed_time": "0:11:36", "remaining_time": "0:45:58"}
85
+ {"current_steps": 810, "total_steps": 3970, "loss": 0.5086, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.045084971874738e-06, "epoch": 2.040302267002519, "percentage": 20.4, "elapsed_time": "0:12:13", "remaining_time": "0:47:43"}
86
+ {"current_steps": 820, "total_steps": 3970, "loss": 0.4363, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 9.021582826353825e-06, "epoch": 2.065491183879093, "percentage": 20.65, "elapsed_time": "0:12:20", "remaining_time": "0:47:23"}
87
+ {"current_steps": 830, "total_steps": 3970, "loss": 0.486, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.997826290688165e-06, "epoch": 2.0906801007556677, "percentage": 20.91, "elapsed_time": "0:12:26", "remaining_time": "0:47:03"}
88
+ {"current_steps": 840, "total_steps": 3970, "loss": 0.5239, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.973816867626503e-06, "epoch": 2.115869017632242, "percentage": 21.16, "elapsed_time": "0:12:32", "remaining_time": "0:46:44"}
89
+ {"current_steps": 850, "total_steps": 3970, "loss": 0.4903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.949556075914286e-06, "epoch": 2.141057934508816, "percentage": 21.41, "elapsed_time": "0:12:38", "remaining_time": "0:46:25"}
90
+ {"current_steps": 860, "total_steps": 3970, "loss": 0.4497, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.925045450197593e-06, "epoch": 2.1662468513853903, "percentage": 21.66, "elapsed_time": "0:12:44", "remaining_time": "0:46:05"}
91
+ {"current_steps": 870, "total_steps": 3970, "loss": 0.4906, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.900286540926062e-06, "epoch": 2.1914357682619645, "percentage": 21.91, "elapsed_time": "0:12:50", "remaining_time": "0:45:46"}
92
+ {"current_steps": 880, "total_steps": 3970, "loss": 0.5016, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.875280914254803e-06, "epoch": 2.216624685138539, "percentage": 22.17, "elapsed_time": "0:12:57", "remaining_time": "0:45:29"}
93
+ {"current_steps": 890, "total_steps": 3970, "loss": 0.477, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.850030151945343e-06, "epoch": 2.2418136020151134, "percentage": 22.42, "elapsed_time": "0:13:03", "remaining_time": "0:45:12"}
94
+ {"current_steps": 900, "total_steps": 3970, "loss": 0.4967, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.824535851265565e-06, "epoch": 2.2670025188916876, "percentage": 22.67, "elapsed_time": "0:13:09", "remaining_time": "0:44:53"}
95
+ {"current_steps": 910, "total_steps": 3970, "loss": 0.4833, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.798799624888665e-06, "epoch": 2.292191435768262, "percentage": 22.92, "elapsed_time": "0:13:15", "remaining_time": "0:44:35"}
96
+ {"current_steps": 920, "total_steps": 3970, "loss": 0.4745, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.772823100791152e-06, "epoch": 2.3173803526448364, "percentage": 23.17, "elapsed_time": "0:13:21", "remaining_time": "0:44:18"}
97
+ {"current_steps": 930, "total_steps": 3970, "loss": 0.4719, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.746607922149853e-06, "epoch": 2.3425692695214106, "percentage": 23.43, "elapsed_time": "0:13:28", "remaining_time": "0:44:03"}
98
+ {"current_steps": 940, "total_steps": 3970, "loss": 0.4749, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.720155747237985e-06, "epoch": 2.367758186397985, "percentage": 23.68, "elapsed_time": "0:13:34", "remaining_time": "0:43:46"}
99
+ {"current_steps": 950, "total_steps": 3970, "loss": 0.457, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.693468249320257e-06, "epoch": 2.392947103274559, "percentage": 23.93, "elapsed_time": "0:13:40", "remaining_time": "0:43:29"}
100
+ {"current_steps": 960, "total_steps": 3970, "loss": 0.4793, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.666547116547015e-06, "epoch": 2.4181360201511337, "percentage": 24.18, "elapsed_time": "0:13:46", "remaining_time": "0:43:12"}
101
+ {"current_steps": 970, "total_steps": 3970, "loss": 0.4589, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.639394051847472e-06, "epoch": 2.443324937027708, "percentage": 24.43, "elapsed_time": "0:13:53", "remaining_time": "0:42:58"}
102
+ {"current_steps": 980, "total_steps": 3970, "loss": 0.4882, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.612010772821972e-06, "epoch": 2.468513853904282, "percentage": 24.69, "elapsed_time": "0:14:00", "remaining_time": "0:42:42"}
103
+ {"current_steps": 990, "total_steps": 3970, "loss": 0.4775, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.584399011633356e-06, "epoch": 2.4937027707808563, "percentage": 24.94, "elapsed_time": "0:14:06", "remaining_time": "0:42:26"}
104
+ {"current_steps": 1000, "total_steps": 3970, "loss": 0.4573, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.556560514897376e-06, "epoch": 2.5188916876574305, "percentage": 25.19, "elapsed_time": "0:14:12", "remaining_time": "0:42:10"}
105
+ {"current_steps": 1000, "total_steps": 3970, "loss": null, "eval_loss": 0.470566064119339, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 2.5188916876574305, "percentage": 25.19, "elapsed_time": "0:14:12", "remaining_time": "0:42:10"}
106
+ {"current_steps": 1010, "total_steps": 3970, "loss": 0.48, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.528497043572222e-06, "epoch": 2.544080604534005, "percentage": 25.44, "elapsed_time": "0:14:32", "remaining_time": "0:42:35"}
107
+ {"current_steps": 1020, "total_steps": 3970, "loss": 0.4544, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.500210372847128e-06, "epoch": 2.5692695214105794, "percentage": 25.69, "elapsed_time": "0:14:37", "remaining_time": "0:42:19"}
108
+ {"current_steps": 1030, "total_steps": 3970, "loss": 0.4977, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.471702292030078e-06, "epoch": 2.5944584382871536, "percentage": 25.94, "elapsed_time": "0:14:43", "remaining_time": "0:42:02"}
109
+ {"current_steps": 1040, "total_steps": 3970, "loss": 0.4467, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.44297460443462e-06, "epoch": 2.619647355163728, "percentage": 26.2, "elapsed_time": "0:14:49", "remaining_time": "0:41:46"}
110
+ {"current_steps": 1050, "total_steps": 3970, "loss": 0.4527, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.414029127265803e-06, "epoch": 2.644836272040302, "percentage": 26.45, "elapsed_time": "0:14:56", "remaining_time": "0:41:32"}
111
+ {"current_steps": 1060, "total_steps": 3970, "loss": 0.468, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.38486769150522e-06, "epoch": 2.6700251889168767, "percentage": 26.7, "elapsed_time": "0:15:02", "remaining_time": "0:41:17"}
112
+ {"current_steps": 1070, "total_steps": 3970, "loss": 0.4817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.355492141795185e-06, "epoch": 2.695214105793451, "percentage": 26.95, "elapsed_time": "0:15:08", "remaining_time": "0:41:03"}
113
+ {"current_steps": 1080, "total_steps": 3970, "loss": 0.4718, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.325904336322057e-06, "epoch": 2.720403022670025, "percentage": 27.2, "elapsed_time": "0:15:15", "remaining_time": "0:40:49"}
114
+ {"current_steps": 1090, "total_steps": 3970, "loss": 0.472, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.296106146698693e-06, "epoch": 2.7455919395465997, "percentage": 27.46, "elapsed_time": "0:15:21", "remaining_time": "0:40:35"}
115
+ {"current_steps": 1100, "total_steps": 3970, "loss": 0.4461, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.266099457846052e-06, "epoch": 2.770780856423174, "percentage": 27.71, "elapsed_time": "0:15:27", "remaining_time": "0:40:20"}
116
+ {"current_steps": 1110, "total_steps": 3970, "loss": 0.4528, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.235886167873975e-06, "epoch": 2.795969773299748, "percentage": 27.96, "elapsed_time": "0:15:33", "remaining_time": "0:40:05"}
117
+ {"current_steps": 1120, "total_steps": 3970, "loss": 0.4638, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.2054681879611e-06, "epoch": 2.8211586901763224, "percentage": 28.21, "elapsed_time": "0:15:40", "remaining_time": "0:39:52"}
118
+ {"current_steps": 1130, "total_steps": 3970, "loss": 0.4538, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.174847442233988e-06, "epoch": 2.8463476070528966, "percentage": 28.46, "elapsed_time": "0:15:46", "remaining_time": "0:39:38"}
119
+ {"current_steps": 1140, "total_steps": 3970, "loss": 0.4563, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.144025867645391e-06, "epoch": 2.8715365239294712, "percentage": 28.72, "elapsed_time": "0:15:52", "remaining_time": "0:39:24"}
120
+ {"current_steps": 1150, "total_steps": 3970, "loss": 0.4484, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.113005413851742e-06, "epoch": 2.8967254408060454, "percentage": 28.97, "elapsed_time": "0:15:58", "remaining_time": "0:39:10"}
121
+ {"current_steps": 1160, "total_steps": 3970, "loss": 0.4687, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.081788043089818e-06, "epoch": 2.9219143576826196, "percentage": 29.22, "elapsed_time": "0:16:04", "remaining_time": "0:38:56"}
122
+ {"current_steps": 1170, "total_steps": 3970, "loss": 0.4463, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.050375730052622e-06, "epoch": 2.947103274559194, "percentage": 29.47, "elapsed_time": "0:16:10", "remaining_time": "0:38:43"}
123
+ {"current_steps": 1180, "total_steps": 3970, "loss": 0.4676, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 8.018770461764471e-06, "epoch": 2.972292191435768, "percentage": 29.72, "elapsed_time": "0:16:17", "remaining_time": "0:38:30"}
124
+ {"current_steps": 1190, "total_steps": 3970, "loss": 0.4539, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.986974237455298e-06, "epoch": 2.9974811083123427, "percentage": 29.97, "elapsed_time": "0:16:23", "remaining_time": "0:38:17"}
125
+ {"current_steps": 1200, "total_steps": 3970, "loss": 0.4628, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.954989068434198e-06, "epoch": 3.022670025188917, "percentage": 30.23, "elapsed_time": "0:16:29", "remaining_time": "0:38:04"}
126
+ {"current_steps": 1200, "total_steps": 3970, "loss": null, "eval_loss": 0.46436572074890137, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.022670025188917, "percentage": 30.23, "elapsed_time": "0:16:29", "remaining_time": "0:38:04"}
127
+ {"current_steps": 1210, "total_steps": 3970, "loss": 0.4232, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.922816977962191e-06, "epoch": 3.047858942065491, "percentage": 30.48, "elapsed_time": "0:16:50", "remaining_time": "0:38:25"}
128
+ {"current_steps": 1220, "total_steps": 3970, "loss": 0.4422, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.890460001124242e-06, "epoch": 3.0730478589420653, "percentage": 30.73, "elapsed_time": "0:16:58", "remaining_time": "0:38:15"}
129
+ {"current_steps": 1230, "total_steps": 3970, "loss": 0.4483, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.857920184700534e-06, "epoch": 3.09823677581864, "percentage": 30.98, "elapsed_time": "0:17:04", "remaining_time": "0:38:02"}
130
+ {"current_steps": 1240, "total_steps": 3970, "loss": 0.4638, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.825199587036989e-06, "epoch": 3.123425692695214, "percentage": 31.23, "elapsed_time": "0:17:10", "remaining_time": "0:37:48"}
131
+ {"current_steps": 1250, "total_steps": 3970, "loss": 0.4417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.792300277915062e-06, "epoch": 3.1486146095717884, "percentage": 31.49, "elapsed_time": "0:17:16", "remaining_time": "0:37:35"}
132
+ {"current_steps": 1260, "total_steps": 3970, "loss": 0.4677, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.759224338420832e-06, "epoch": 3.1738035264483626, "percentage": 31.74, "elapsed_time": "0:17:22", "remaining_time": "0:37:21"}
133
+ {"current_steps": 1270, "total_steps": 3970, "loss": 0.4304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.725973860813338e-06, "epoch": 3.1989924433249373, "percentage": 31.99, "elapsed_time": "0:17:28", "remaining_time": "0:37:08"}
134
+ {"current_steps": 1280, "total_steps": 3970, "loss": 0.4818, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.692550948392248e-06, "epoch": 3.2241813602015115, "percentage": 32.24, "elapsed_time": "0:17:34", "remaining_time": "0:36:55"}
135
+ {"current_steps": 1290, "total_steps": 3970, "loss": 0.4483, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.658957715364801e-06, "epoch": 3.2493702770780857, "percentage": 32.49, "elapsed_time": "0:17:39", "remaining_time": "0:36:42"}
136
+ {"current_steps": 1300, "total_steps": 3970, "loss": 0.4417, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.625196286712076e-06, "epoch": 3.27455919395466, "percentage": 32.75, "elapsed_time": "0:17:46", "remaining_time": "0:36:29"}
137
+ {"current_steps": 1310, "total_steps": 3970, "loss": 0.4722, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.591268798054569e-06, "epoch": 3.299748110831234, "percentage": 33.0, "elapsed_time": "0:17:52", "remaining_time": "0:36:17"}
138
+ {"current_steps": 1320, "total_steps": 3970, "loss": 0.4739, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.5571773955171124e-06, "epoch": 3.3249370277078087, "percentage": 33.25, "elapsed_time": "0:17:58", "remaining_time": "0:36:05"}
139
+ {"current_steps": 1330, "total_steps": 3970, "loss": 0.4062, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.522924235593103e-06, "epoch": 3.350125944584383, "percentage": 33.5, "elapsed_time": "0:18:05", "remaining_time": "0:35:53"}
140
+ {"current_steps": 1340, "total_steps": 3970, "loss": 0.4644, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.488511485008106e-06, "epoch": 3.375314861460957, "percentage": 33.75, "elapsed_time": "0:18:11", "remaining_time": "0:35:41"}
141
+ {"current_steps": 1350, "total_steps": 3970, "loss": 0.451, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.453941320582785e-06, "epoch": 3.4005037783375314, "percentage": 34.01, "elapsed_time": "0:18:18", "remaining_time": "0:35:31"}
142
+ {"current_steps": 1360, "total_steps": 3970, "loss": 0.4254, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.419215929095211e-06, "epoch": 3.4256926952141056, "percentage": 34.26, "elapsed_time": "0:18:24", "remaining_time": "0:35:18"}
143
+ {"current_steps": 1370, "total_steps": 3970, "loss": 0.4723, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.3843375071425315e-06, "epoch": 3.4508816120906802, "percentage": 34.51, "elapsed_time": "0:18:30", "remaining_time": "0:35:07"}
144
+ {"current_steps": 1380, "total_steps": 3970, "loss": 0.4442, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.349308261002023e-06, "epoch": 3.4760705289672544, "percentage": 34.76, "elapsed_time": "0:18:36", "remaining_time": "0:34:55"}
145
+ {"current_steps": 1390, "total_steps": 3970, "loss": 0.441, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.314130406491533e-06, "epoch": 3.5012594458438286, "percentage": 35.01, "elapsed_time": "0:18:43", "remaining_time": "0:34:44"}
146
+ {"current_steps": 1400, "total_steps": 3970, "loss": 0.4429, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.278806168829313e-06, "epoch": 3.5264483627204033, "percentage": 35.26, "elapsed_time": "0:18:49", "remaining_time": "0:34:33"}
147
+ {"current_steps": 1400, "total_steps": 3970, "loss": null, "eval_loss": 0.45942774415016174, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 3.5264483627204033, "percentage": 35.26, "elapsed_time": "0:18:49", "remaining_time": "0:34:33"}
148
+ {"current_steps": 1410, "total_steps": 3970, "loss": 0.4372, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.243337782493255e-06, "epoch": 3.551637279596977, "percentage": 35.52, "elapsed_time": "0:19:09", "remaining_time": "0:34:47"}
149
+ {"current_steps": 1420, "total_steps": 3970, "loss": 0.4204, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.2077274910795605e-06, "epoch": 3.5768261964735517, "percentage": 35.77, "elapsed_time": "0:19:16", "remaining_time": "0:34:36"}
150
+ {"current_steps": 1430, "total_steps": 3970, "loss": 0.4368, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.1719775471608025e-06, "epoch": 3.602015113350126, "percentage": 36.02, "elapsed_time": "0:19:22", "remaining_time": "0:34:25"}
151
+ {"current_steps": 1440, "total_steps": 3970, "loss": 0.4321, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.136090212143447e-06, "epoch": 3.6272040302267, "percentage": 36.27, "elapsed_time": "0:19:29", "remaining_time": "0:34:14"}
152
+ {"current_steps": 1450, "total_steps": 3970, "loss": 0.4576, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.100067756124803e-06, "epoch": 3.652392947103275, "percentage": 36.52, "elapsed_time": "0:19:36", "remaining_time": "0:34:04"}
153
+ {"current_steps": 1460, "total_steps": 3970, "loss": 0.443, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.063912457749426e-06, "epoch": 3.677581863979849, "percentage": 36.78, "elapsed_time": "0:19:42", "remaining_time": "0:33:53"}
154
+ {"current_steps": 1470, "total_steps": 3970, "loss": 0.4369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 7.02762660406497e-06, "epoch": 3.702770780856423, "percentage": 37.03, "elapsed_time": "0:19:49", "remaining_time": "0:33:43"}
155
+ {"current_steps": 1480, "total_steps": 3970, "loss": 0.4246, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.991212490377532e-06, "epoch": 3.7279596977329974, "percentage": 37.28, "elapsed_time": "0:19:56", "remaining_time": "0:33:32"}
156
+ {"current_steps": 1490, "total_steps": 3970, "loss": 0.4307, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.954672420106452e-06, "epoch": 3.7531486146095716, "percentage": 37.53, "elapsed_time": "0:20:02", "remaining_time": "0:33:21"}
157
+ {"current_steps": 1500, "total_steps": 3970, "loss": 0.4304, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.918008704638603e-06, "epoch": 3.7783375314861463, "percentage": 37.78, "elapsed_time": "0:20:09", "remaining_time": "0:33:11"}
158
+ {"current_steps": 1510, "total_steps": 3970, "loss": 0.4817, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.8812236631821886e-06, "epoch": 3.8035264483627205, "percentage": 38.04, "elapsed_time": "0:20:15", "remaining_time": "0:33:00"}
159
+ {"current_steps": 1520, "total_steps": 3970, "loss": 0.4646, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.844319622620039e-06, "epoch": 3.8287153652392947, "percentage": 38.29, "elapsed_time": "0:20:22", "remaining_time": "0:32:50"}
160
+ {"current_steps": 1530, "total_steps": 3970, "loss": 0.449, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.807298917362417e-06, "epoch": 3.853904282115869, "percentage": 38.54, "elapsed_time": "0:20:28", "remaining_time": "0:32:39"}
161
+ {"current_steps": 1540, "total_steps": 3970, "loss": 0.4498, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.7701638891993515e-06, "epoch": 3.879093198992443, "percentage": 38.79, "elapsed_time": "0:20:34", "remaining_time": "0:32:28"}
162
+ {"current_steps": 1550, "total_steps": 3970, "loss": 0.4683, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.732916887152508e-06, "epoch": 3.9042821158690177, "percentage": 39.04, "elapsed_time": "0:20:41", "remaining_time": "0:32:17"}
163
+ {"current_steps": 1560, "total_steps": 3970, "loss": 0.4709, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.695560267326599e-06, "epoch": 3.929471032745592, "percentage": 39.29, "elapsed_time": "0:20:47", "remaining_time": "0:32:07"}
164
+ {"current_steps": 1570, "total_steps": 3970, "loss": 0.4369, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.65809639276034e-06, "epoch": 3.954659949622166, "percentage": 39.55, "elapsed_time": "0:20:54", "remaining_time": "0:31:58"}
165
+ {"current_steps": 1580, "total_steps": 3970, "loss": 0.4293, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.62052763327698e-06, "epoch": 3.979848866498741, "percentage": 39.8, "elapsed_time": "0:21:01", "remaining_time": "0:31:48"}
166
+ {"current_steps": 1590, "total_steps": 3970, "loss": 0.4282, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.582856365334389e-06, "epoch": 4.005037783375315, "percentage": 40.05, "elapsed_time": "0:21:08", "remaining_time": "0:31:38"}
167
+ {"current_steps": 1600, "total_steps": 3970, "loss": 0.4058, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.545084971874738e-06, "epoch": 4.030226700251889, "percentage": 40.3, "elapsed_time": "0:21:14", "remaining_time": "0:31:28"}
168
+ {"current_steps": 1600, "total_steps": 3970, "loss": null, "eval_loss": 0.45875710248947144, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.030226700251889, "percentage": 40.3, "elapsed_time": "0:21:14", "remaining_time": "0:31:28"}
169
+ {"current_steps": 1610, "total_steps": 3970, "loss": 0.4402, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.507215842173758e-06, "epoch": 4.055415617128464, "percentage": 40.55, "elapsed_time": "0:21:35", "remaining_time": "0:31:38"}
170
+ {"current_steps": 1620, "total_steps": 3970, "loss": 0.4431, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.469251371689606e-06, "epoch": 4.080604534005038, "percentage": 40.81, "elapsed_time": "0:21:41", "remaining_time": "0:31:28"}
171
+ {"current_steps": 1630, "total_steps": 3970, "loss": 0.4348, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.431193961911336e-06, "epoch": 4.105793450881612, "percentage": 41.06, "elapsed_time": "0:21:48", "remaining_time": "0:31:18"}
172
+ {"current_steps": 1640, "total_steps": 3970, "loss": 0.4343, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.393046020206995e-06, "epoch": 4.130982367758186, "percentage": 41.31, "elapsed_time": "0:21:54", "remaining_time": "0:31:07"}
173
+ {"current_steps": 1650, "total_steps": 3970, "loss": 0.4527, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.354809959671331e-06, "epoch": 4.156171284634761, "percentage": 41.56, "elapsed_time": "0:22:01", "remaining_time": "0:30:57"}
174
+ {"current_steps": 1660, "total_steps": 3970, "loss": 0.4277, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.316488198973162e-06, "epoch": 4.181360201511335, "percentage": 41.81, "elapsed_time": "0:22:07", "remaining_time": "0:30:47"}
175
+ {"current_steps": 1670, "total_steps": 3970, "loss": 0.4261, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.278083162202374e-06, "epoch": 4.206549118387909, "percentage": 42.07, "elapsed_time": "0:22:14", "remaining_time": "0:30:38"}
176
+ {"current_steps": 1680, "total_steps": 3970, "loss": 0.4353, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.239597278716581e-06, "epoch": 4.231738035264484, "percentage": 42.32, "elapsed_time": "0:22:21", "remaining_time": "0:30:28"}
177
+ {"current_steps": 1690, "total_steps": 3970, "loss": 0.4406, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.201032982987456e-06, "epoch": 4.2569269521410575, "percentage": 42.57, "elapsed_time": "0:22:27", "remaining_time": "0:30:18"}
178
+ {"current_steps": 1700, "total_steps": 3970, "loss": 0.4265, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.162392714446732e-06, "epoch": 4.282115869017632, "percentage": 42.82, "elapsed_time": "0:22:34", "remaining_time": "0:30:08"}
179
+ {"current_steps": 1710, "total_steps": 3970, "loss": 0.4273, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.123678917331902e-06, "epoch": 4.307304785894207, "percentage": 43.07, "elapsed_time": "0:22:41", "remaining_time": "0:29:59"}
180
+ {"current_steps": 1720, "total_steps": 3970, "loss": 0.4205, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.084894040531591e-06, "epoch": 4.332493702770781, "percentage": 43.32, "elapsed_time": "0:22:47", "remaining_time": "0:29:49"}
181
+ {"current_steps": 1730, "total_steps": 3970, "loss": 0.4637, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.046040537430662e-06, "epoch": 4.357682619647355, "percentage": 43.58, "elapsed_time": "0:22:54", "remaining_time": "0:29:39"}
182
+ {"current_steps": 1740, "total_steps": 3970, "loss": 0.4153, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 6.007120865755013e-06, "epoch": 4.382871536523929, "percentage": 43.83, "elapsed_time": "0:23:00", "remaining_time": "0:29:29"}
183
+ {"current_steps": 1750, "total_steps": 3970, "loss": 0.4333, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.968137487416123e-06, "epoch": 4.408060453400504, "percentage": 44.08, "elapsed_time": "0:23:07", "remaining_time": "0:29:20"}
184
+ {"current_steps": 1760, "total_steps": 3970, "loss": 0.3902, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.9290928683553105e-06, "epoch": 4.433249370277078, "percentage": 44.33, "elapsed_time": "0:23:14", "remaining_time": "0:29:10"}
185
+ {"current_steps": 1770, "total_steps": 3970, "loss": 0.4325, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.8899894783877536e-06, "epoch": 4.458438287153652, "percentage": 44.58, "elapsed_time": "0:23:21", "remaining_time": "0:29:01"}
186
+ {"current_steps": 1780, "total_steps": 3970, "loss": 0.4076, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.8508297910462464e-06, "epoch": 4.483627204030227, "percentage": 44.84, "elapsed_time": "0:23:27", "remaining_time": "0:28:51"}
187
+ {"current_steps": 1790, "total_steps": 3970, "loss": 0.4301, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.811616283424756e-06, "epoch": 4.508816120906801, "percentage": 45.09, "elapsed_time": "0:23:34", "remaining_time": "0:28:42"}
188
+ {"current_steps": 1800, "total_steps": 3970, "loss": 0.4365, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.772351436021706e-06, "epoch": 4.534005037783375, "percentage": 45.34, "elapsed_time": "0:23:40", "remaining_time": "0:28:32"}
189
+ {"current_steps": 1800, "total_steps": 3970, "loss": null, "eval_loss": 0.4558510482311249, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 4.534005037783375, "percentage": 45.34, "elapsed_time": "0:23:40", "remaining_time": "0:28:32"}
190
+ {"current_steps": 1810, "total_steps": 3970, "loss": 0.43, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.733037732583091e-06, "epoch": 4.55919395465995, "percentage": 45.59, "elapsed_time": "0:24:00", "remaining_time": "0:28:39"}
191
+ {"current_steps": 1820, "total_steps": 3970, "loss": 0.4092, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.693677659945343e-06, "epoch": 4.584382871536524, "percentage": 45.84, "elapsed_time": "0:24:07", "remaining_time": "0:28:29"}
192
+ {"current_steps": 1830, "total_steps": 3970, "loss": 0.4115, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.654273707878042e-06, "epoch": 4.609571788413098, "percentage": 46.1, "elapsed_time": "0:24:13", "remaining_time": "0:28:20"}
193
+ {"current_steps": 1840, "total_steps": 3970, "loss": 0.4145, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.614828368926411e-06, "epoch": 4.634760705289673, "percentage": 46.35, "elapsed_time": "0:24:20", "remaining_time": "0:28:10"}
194
+ {"current_steps": 1850, "total_steps": 3970, "loss": 0.4281, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.575344138253656e-06, "epoch": 4.659949622166247, "percentage": 46.6, "elapsed_time": "0:24:26", "remaining_time": "0:28:00"}
195
+ {"current_steps": 1860, "total_steps": 3970, "loss": 0.4359, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.535823513483123e-06, "epoch": 4.685138539042821, "percentage": 46.85, "elapsed_time": "0:24:32", "remaining_time": "0:27:50"}
196
+ {"current_steps": 1870, "total_steps": 3970, "loss": 0.4202, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.496268994540309e-06, "epoch": 4.710327455919396, "percentage": 47.1, "elapsed_time": "0:24:39", "remaining_time": "0:27:41"}
197
+ {"current_steps": 1880, "total_steps": 3970, "loss": 0.4154, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.456683083494731e-06, "epoch": 4.73551637279597, "percentage": 47.36, "elapsed_time": "0:24:45", "remaining_time": "0:27:31"}
198
+ {"current_steps": 1890, "total_steps": 3970, "loss": 0.4411, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.417068284401655e-06, "epoch": 4.760705289672544, "percentage": 47.61, "elapsed_time": "0:24:51", "remaining_time": "0:27:21"}
199
+ {"current_steps": 1900, "total_steps": 3970, "loss": 0.409, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.37742710314369e-06, "epoch": 4.785894206549118, "percentage": 47.86, "elapsed_time": "0:24:57", "remaining_time": "0:27:12"}
200
+ {"current_steps": 1910, "total_steps": 3970, "loss": 0.4551, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.337762047272282e-06, "epoch": 4.811083123425693, "percentage": 48.11, "elapsed_time": "0:25:04", "remaining_time": "0:27:02"}
201
+ {"current_steps": 1920, "total_steps": 3970, "loss": 0.4151, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.2980756258491e-06, "epoch": 4.836272040302267, "percentage": 48.36, "elapsed_time": "0:25:10", "remaining_time": "0:26:52"}
202
+ {"current_steps": 1930, "total_steps": 3970, "loss": 0.4105, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.258370349287312e-06, "epoch": 4.861460957178841, "percentage": 48.61, "elapsed_time": "0:25:16", "remaining_time": "0:26:43"}
203
+ {"current_steps": 1940, "total_steps": 3970, "loss": 0.4297, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.2186487291927935e-06, "epoch": 4.886649874055416, "percentage": 48.87, "elapsed_time": "0:25:23", "remaining_time": "0:26:34"}
204
+ {"current_steps": 1950, "total_steps": 3970, "loss": 0.4639, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.178913278205248e-06, "epoch": 4.91183879093199, "percentage": 49.12, "elapsed_time": "0:25:30", "remaining_time": "0:26:25"}
205
+ {"current_steps": 1960, "total_steps": 3970, "loss": 0.4213, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.139166509839271e-06, "epoch": 4.937027707808564, "percentage": 49.37, "elapsed_time": "0:25:37", "remaining_time": "0:26:16"}
206
+ {"current_steps": 1970, "total_steps": 3970, "loss": 0.4294, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.099410938325351e-06, "epoch": 4.962216624685139, "percentage": 49.62, "elapsed_time": "0:25:43", "remaining_time": "0:26:07"}
207
+ {"current_steps": 1980, "total_steps": 3970, "loss": 0.4139, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.059649078450834e-06, "epoch": 4.987405541561713, "percentage": 49.87, "elapsed_time": "0:25:50", "remaining_time": "0:25:57"}
208
+ {"current_steps": 1990, "total_steps": 3970, "loss": 0.397, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 5.019883445400838e-06, "epoch": 5.012594458438287, "percentage": 50.13, "elapsed_time": "0:25:56", "remaining_time": "0:25:48"}
209
+ {"current_steps": 2000, "total_steps": 3970, "loss": 0.4189, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.980116554599164e-06, "epoch": 5.037783375314861, "percentage": 50.38, "elapsed_time": "0:26:02", "remaining_time": "0:25:39"}
210
+ {"current_steps": 2000, "total_steps": 3970, "loss": null, "eval_loss": 0.4556237757205963, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.037783375314861, "percentage": 50.38, "elapsed_time": "0:26:02", "remaining_time": "0:25:39"}
211
+ {"current_steps": 2010, "total_steps": 3970, "loss": 0.397, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.940350921549167e-06, "epoch": 5.062972292191436, "percentage": 50.63, "elapsed_time": "0:26:23", "remaining_time": "0:25:44"}
212
+ {"current_steps": 2020, "total_steps": 3970, "loss": 0.4099, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.900589061674649e-06, "epoch": 5.08816120906801, "percentage": 50.88, "elapsed_time": "0:26:30", "remaining_time": "0:25:34"}
213
+ {"current_steps": 2030, "total_steps": 3970, "loss": 0.3904, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.86083349016073e-06, "epoch": 5.113350125944584, "percentage": 51.13, "elapsed_time": "0:26:36", "remaining_time": "0:25:25"}
214
+ {"current_steps": 2040, "total_steps": 3970, "loss": 0.4301, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.821086721794754e-06, "epoch": 5.138539042821159, "percentage": 51.39, "elapsed_time": "0:26:43", "remaining_time": "0:25:16"}
215
+ {"current_steps": 2050, "total_steps": 3970, "loss": 0.3907, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.781351270807208e-06, "epoch": 5.163727959697733, "percentage": 51.64, "elapsed_time": "0:26:50", "remaining_time": "0:25:08"}
216
+ {"current_steps": 2060, "total_steps": 3970, "loss": 0.3786, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.74162965071269e-06, "epoch": 5.188916876574307, "percentage": 51.89, "elapsed_time": "0:26:57", "remaining_time": "0:24:59"}
217
+ {"current_steps": 2070, "total_steps": 3970, "loss": 0.4028, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.701924374150901e-06, "epoch": 5.214105793450882, "percentage": 52.14, "elapsed_time": "0:27:03", "remaining_time": "0:24:50"}
218
+ {"current_steps": 2080, "total_steps": 3970, "loss": 0.39, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6622379527277195e-06, "epoch": 5.239294710327456, "percentage": 52.39, "elapsed_time": "0:27:10", "remaining_time": "0:24:41"}
219
+ {"current_steps": 2090, "total_steps": 3970, "loss": 0.4067, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.6225728968563126e-06, "epoch": 5.26448362720403, "percentage": 52.64, "elapsed_time": "0:27:16", "remaining_time": "0:24:31"}
220
+ {"current_steps": 2100, "total_steps": 3970, "loss": 0.4345, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.582931715598346e-06, "epoch": 5.289672544080605, "percentage": 52.9, "elapsed_time": "0:27:22", "remaining_time": "0:24:22"}
221
+ {"current_steps": 2110, "total_steps": 3970, "loss": 0.3994, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.543316916505269e-06, "epoch": 5.314861460957179, "percentage": 53.15, "elapsed_time": "0:27:28", "remaining_time": "0:24:13"}
222
+ {"current_steps": 2120, "total_steps": 3970, "loss": 0.4218, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.5037310054596936e-06, "epoch": 5.340050377833753, "percentage": 53.4, "elapsed_time": "0:27:34", "remaining_time": "0:24:04"}
223
+ {"current_steps": 2130, "total_steps": 3970, "loss": 0.3875, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.46417648651688e-06, "epoch": 5.365239294710327, "percentage": 53.65, "elapsed_time": "0:27:41", "remaining_time": "0:23:54"}
224
+ {"current_steps": 2140, "total_steps": 3970, "loss": 0.4069, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.4246558617463445e-06, "epoch": 5.390428211586902, "percentage": 53.9, "elapsed_time": "0:27:47", "remaining_time": "0:23:45"}
225
+ {"current_steps": 2150, "total_steps": 3970, "loss": 0.4043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.38517163107359e-06, "epoch": 5.415617128463476, "percentage": 54.16, "elapsed_time": "0:27:53", "remaining_time": "0:23:36"}
226
+ {"current_steps": 2160, "total_steps": 3970, "loss": 0.3962, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.34572629212196e-06, "epoch": 5.44080604534005, "percentage": 54.41, "elapsed_time": "0:27:59", "remaining_time": "0:23:27"}
227
+ {"current_steps": 2170, "total_steps": 3970, "loss": 0.4041, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.30632234005466e-06, "epoch": 5.465994962216625, "percentage": 54.66, "elapsed_time": "0:28:05", "remaining_time": "0:23:18"}
228
+ {"current_steps": 2180, "total_steps": 3970, "loss": 0.4248, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.266962267416911e-06, "epoch": 5.491183879093199, "percentage": 54.91, "elapsed_time": "0:28:11", "remaining_time": "0:23:09"}
229
+ {"current_steps": 2190, "total_steps": 3970, "loss": 0.4181, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.227648563978294e-06, "epoch": 5.516372795969773, "percentage": 55.16, "elapsed_time": "0:28:17", "remaining_time": "0:22:59"}
230
+ {"current_steps": 2200, "total_steps": 3970, "loss": 0.4096, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.188383716575246e-06, "epoch": 5.541561712846348, "percentage": 55.42, "elapsed_time": "0:28:24", "remaining_time": "0:22:51"}
231
+ {"current_steps": 2200, "total_steps": 3970, "loss": null, "eval_loss": 0.4560880959033966, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 5.541561712846348, "percentage": 55.42, "elapsed_time": "0:28:24", "remaining_time": "0:22:51"}
232
+ {"current_steps": 2210, "total_steps": 3970, "loss": 0.4009, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.149170208953756e-06, "epoch": 5.566750629722922, "percentage": 55.67, "elapsed_time": "0:28:43", "remaining_time": "0:22:52"}
233
+ {"current_steps": 2220, "total_steps": 3970, "loss": 0.4022, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.11001052161225e-06, "epoch": 5.591939546599496, "percentage": 55.92, "elapsed_time": "0:28:50", "remaining_time": "0:22:43"}
234
+ {"current_steps": 2230, "total_steps": 3970, "loss": 0.426, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.07090713164469e-06, "epoch": 5.617128463476071, "percentage": 56.17, "elapsed_time": "0:28:56", "remaining_time": "0:22:34"}
235
+ {"current_steps": 2240, "total_steps": 3970, "loss": 0.4285, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 4.0318625125838774e-06, "epoch": 5.642317380352645, "percentage": 56.42, "elapsed_time": "0:29:02", "remaining_time": "0:22:25"}
236
+ {"current_steps": 2250, "total_steps": 3970, "loss": 0.3965, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.992879134244988e-06, "epoch": 5.667506297229219, "percentage": 56.68, "elapsed_time": "0:29:08", "remaining_time": "0:22:16"}
237
+ {"current_steps": 2260, "total_steps": 3970, "loss": 0.433, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.95395946256934e-06, "epoch": 5.692695214105793, "percentage": 56.93, "elapsed_time": "0:29:15", "remaining_time": "0:22:08"}
238
+ {"current_steps": 2270, "total_steps": 3970, "loss": 0.4119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.91510595946841e-06, "epoch": 5.717884130982368, "percentage": 57.18, "elapsed_time": "0:29:21", "remaining_time": "0:21:58"}
239
+ {"current_steps": 2280, "total_steps": 3970, "loss": 0.4023, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.876321082668098e-06, "epoch": 5.7430730478589425, "percentage": 57.43, "elapsed_time": "0:29:27", "remaining_time": "0:21:50"}
240
+ {"current_steps": 2290, "total_steps": 3970, "loss": 0.4085, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.837607285553269e-06, "epoch": 5.768261964735516, "percentage": 57.68, "elapsed_time": "0:29:33", "remaining_time": "0:21:41"}
241
+ {"current_steps": 2300, "total_steps": 3970, "loss": 0.4306, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7989670170125463e-06, "epoch": 5.793450881612091, "percentage": 57.93, "elapsed_time": "0:29:39", "remaining_time": "0:21:32"}
242
+ {"current_steps": 2310, "total_steps": 3970, "loss": 0.4222, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.7604027212834202e-06, "epoch": 5.818639798488665, "percentage": 58.19, "elapsed_time": "0:29:46", "remaining_time": "0:21:23"}
243
+ {"current_steps": 2320, "total_steps": 3970, "loss": 0.4235, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.721916837797627e-06, "epoch": 5.843828715365239, "percentage": 58.44, "elapsed_time": "0:29:52", "remaining_time": "0:21:14"}
244
+ {"current_steps": 2330, "total_steps": 3970, "loss": 0.412, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.6835118010268394e-06, "epoch": 5.869017632241814, "percentage": 58.69, "elapsed_time": "0:29:58", "remaining_time": "0:21:05"}
245
+ {"current_steps": 2340, "total_steps": 3970, "loss": 0.4299, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.64519004032867e-06, "epoch": 5.894206549118388, "percentage": 58.94, "elapsed_time": "0:30:04", "remaining_time": "0:20:56"}
246
+ {"current_steps": 2350, "total_steps": 3970, "loss": 0.4012, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.6069539797930075e-06, "epoch": 5.919395465994962, "percentage": 59.19, "elapsed_time": "0:30:10", "remaining_time": "0:20:48"}
247
+ {"current_steps": 2360, "total_steps": 3970, "loss": 0.4093, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5688060380886646e-06, "epoch": 5.944584382871536, "percentage": 59.45, "elapsed_time": "0:30:17", "remaining_time": "0:20:39"}
248
+ {"current_steps": 2370, "total_steps": 3970, "loss": 0.4238, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.5307486283103966e-06, "epoch": 5.969773299748111, "percentage": 59.7, "elapsed_time": "0:30:23", "remaining_time": "0:20:31"}
249
+ {"current_steps": 2380, "total_steps": 3970, "loss": 0.4038, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.4927841578262445e-06, "epoch": 5.994962216624685, "percentage": 59.95, "elapsed_time": "0:30:30", "remaining_time": "0:20:22"}
250
+ {"current_steps": 2390, "total_steps": 3970, "loss": 0.386, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.4549150281252635e-06, "epoch": 6.020151133501259, "percentage": 60.2, "elapsed_time": "0:30:37", "remaining_time": "0:20:14"}
251
+ {"current_steps": 2400, "total_steps": 3970, "loss": 0.4087, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.417143634665613e-06, "epoch": 6.045340050377834, "percentage": 60.45, "elapsed_time": "0:30:43", "remaining_time": "0:20:06"}
252
+ {"current_steps": 2400, "total_steps": 3970, "loss": null, "eval_loss": 0.4591861665248871, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 6.045340050377834, "percentage": 60.45, "elapsed_time": "0:30:43", "remaining_time": "0:20:06"}
253
+ {"current_steps": 2410, "total_steps": 3970, "loss": 0.3937, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.3794723667230213e-06, "epoch": 6.0705289672544085, "percentage": 60.71, "elapsed_time": "0:31:02", "remaining_time": "0:20:05"}
254
+ {"current_steps": 2420, "total_steps": 3970, "loss": 0.3928, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.3419036072396614e-06, "epoch": 6.095717884130982, "percentage": 60.96, "elapsed_time": "0:31:09", "remaining_time": "0:19:57"}
255
+ {"current_steps": 2430, "total_steps": 3970, "loss": 0.3725, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.304439732673402e-06, "epoch": 6.120906801007557, "percentage": 61.21, "elapsed_time": "0:31:15", "remaining_time": "0:19:48"}
256
+ {"current_steps": 2440, "total_steps": 3970, "loss": 0.3994, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2670831128474922e-06, "epoch": 6.146095717884131, "percentage": 61.46, "elapsed_time": "0:31:21", "remaining_time": "0:19:39"}
257
+ {"current_steps": 2450, "total_steps": 3970, "loss": 0.3785, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.2298361108006506e-06, "epoch": 6.171284634760705, "percentage": 61.71, "elapsed_time": "0:31:27", "remaining_time": "0:19:31"}
258
+ {"current_steps": 2460, "total_steps": 3970, "loss": 0.3805, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.192701082637585e-06, "epoch": 6.19647355163728, "percentage": 61.96, "elapsed_time": "0:31:33", "remaining_time": "0:19:22"}
259
+ {"current_steps": 2470, "total_steps": 3970, "loss": 0.4319, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.1556803773799616e-06, "epoch": 6.221662468513854, "percentage": 62.22, "elapsed_time": "0:31:39", "remaining_time": "0:19:13"}
260
+ {"current_steps": 2480, "total_steps": 3970, "loss": 0.3983, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.1187763368178127e-06, "epoch": 6.246851385390428, "percentage": 62.47, "elapsed_time": "0:31:45", "remaining_time": "0:19:05"}
261
+ {"current_steps": 2490, "total_steps": 3970, "loss": 0.4043, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0819912953613982e-06, "epoch": 6.272040302267002, "percentage": 62.72, "elapsed_time": "0:31:51", "remaining_time": "0:18:56"}
262
+ {"current_steps": 2500, "total_steps": 3970, "loss": 0.4194, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.04532757989355e-06, "epoch": 6.297229219143577, "percentage": 62.97, "elapsed_time": "0:31:58", "remaining_time": "0:18:48"}
263
+ {"current_steps": 2510, "total_steps": 3970, "loss": 0.4037, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 3.0087875096224674e-06, "epoch": 6.3224181360201515, "percentage": 63.22, "elapsed_time": "0:32:05", "remaining_time": "0:18:40"}
264
+ {"current_steps": 2520, "total_steps": 3970, "loss": 0.3713, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.972373395935031e-06, "epoch": 6.347607052896725, "percentage": 63.48, "elapsed_time": "0:32:12", "remaining_time": "0:18:31"}
265
+ {"current_steps": 2530, "total_steps": 3970, "loss": 0.3911, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.936087542250577e-06, "epoch": 6.3727959697733, "percentage": 63.73, "elapsed_time": "0:32:19", "remaining_time": "0:18:23"}
266
+ {"current_steps": 2540, "total_steps": 3970, "loss": 0.4214, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.8999322438751974e-06, "epoch": 6.3979848866498745, "percentage": 63.98, "elapsed_time": "0:32:25", "remaining_time": "0:18:15"}
267
+ {"current_steps": 2550, "total_steps": 3970, "loss": 0.3964, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.863909787856555e-06, "epoch": 6.423173803526448, "percentage": 64.23, "elapsed_time": "0:32:32", "remaining_time": "0:18:07"}
268
+ {"current_steps": 2560, "total_steps": 3970, "loss": 0.414, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.828022452839201e-06, "epoch": 6.448362720403023, "percentage": 64.48, "elapsed_time": "0:32:38", "remaining_time": "0:17:58"}
269
+ {"current_steps": 2570, "total_steps": 3970, "loss": 0.3914, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.792272508920443e-06, "epoch": 6.473551637279597, "percentage": 64.74, "elapsed_time": "0:32:44", "remaining_time": "0:17:50"}
270
+ {"current_steps": 2580, "total_steps": 3970, "loss": 0.3979, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.756662217506745e-06, "epoch": 6.498740554156171, "percentage": 64.99, "elapsed_time": "0:32:51", "remaining_time": "0:17:42"}
271
+ {"current_steps": 2590, "total_steps": 3970, "loss": 0.4186, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.7211938311706886e-06, "epoch": 6.523929471032746, "percentage": 65.24, "elapsed_time": "0:32:57", "remaining_time": "0:17:33"}
272
+ {"current_steps": 2600, "total_steps": 3970, "loss": 0.4119, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.6858695935084676e-06, "epoch": 6.54911838790932, "percentage": 65.49, "elapsed_time": "0:33:03", "remaining_time": "0:17:25"}
273
+ {"current_steps": 2600, "total_steps": 3970, "loss": null, "eval_loss": 0.4568886458873749, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 6.54911838790932, "percentage": 65.49, "elapsed_time": "0:33:03", "remaining_time": "0:17:25"}
274
+ {"current_steps": 2610, "total_steps": 3970, "loss": 0.3928, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.6506917389979782e-06, "epoch": 6.574307304785894, "percentage": 65.74, "elapsed_time": "0:33:24", "remaining_time": "0:17:24"}
275
+ {"current_steps": 2620, "total_steps": 3970, "loss": 0.4012, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.615662492857471e-06, "epoch": 6.599496221662468, "percentage": 65.99, "elapsed_time": "0:33:30", "remaining_time": "0:17:15"}
276
+ {"current_steps": 2630, "total_steps": 3970, "loss": 0.3903, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5807840709047916e-06, "epoch": 6.624685138539043, "percentage": 66.25, "elapsed_time": "0:33:36", "remaining_time": "0:17:07"}
277
+ {"current_steps": 2640, "total_steps": 3970, "loss": 0.3874, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.546058679417217e-06, "epoch": 6.6498740554156175, "percentage": 66.5, "elapsed_time": "0:33:42", "remaining_time": "0:16:59"}
278
+ {"current_steps": 2650, "total_steps": 3970, "loss": 0.4104, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.5114885149918943e-06, "epoch": 6.675062972292191, "percentage": 66.75, "elapsed_time": "0:33:49", "remaining_time": "0:16:50"}
279
+ {"current_steps": 2660, "total_steps": 3970, "loss": 0.4219, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.477075764406898e-06, "epoch": 6.700251889168766, "percentage": 67.0, "elapsed_time": "0:33:55", "remaining_time": "0:16:42"}
280
+ {"current_steps": 2670, "total_steps": 3970, "loss": 0.4036, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.4428226044828896e-06, "epoch": 6.72544080604534, "percentage": 67.25, "elapsed_time": "0:34:02", "remaining_time": "0:16:34"}
281
+ {"current_steps": 2680, "total_steps": 3970, "loss": 0.3726, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.4087312019454322e-06, "epoch": 6.750629722921914, "percentage": 67.51, "elapsed_time": "0:34:08", "remaining_time": "0:16:25"}
282
+ {"current_steps": 2690, "total_steps": 3970, "loss": 0.3707, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.3748037132879246e-06, "epoch": 6.775818639798489, "percentage": 67.76, "elapsed_time": "0:34:14", "remaining_time": "0:16:17"}
283
+ {"current_steps": 2700, "total_steps": 3970, "loss": 0.3926, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.341042284635201e-06, "epoch": 6.801007556675063, "percentage": 68.01, "elapsed_time": "0:34:19", "remaining_time": "0:16:08"}
284
+ {"current_steps": 2710, "total_steps": 3970, "loss": 0.4061, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.3074490516077536e-06, "epoch": 6.826196473551637, "percentage": 68.26, "elapsed_time": "0:34:26", "remaining_time": "0:16:00"}
285
+ {"current_steps": 2720, "total_steps": 3970, "loss": 0.3985, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2740261391866634e-06, "epoch": 6.851385390428211, "percentage": 68.51, "elapsed_time": "0:34:34", "remaining_time": "0:15:53"}
286
+ {"current_steps": 2730, "total_steps": 3970, "loss": 0.3937, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.2407756615791693e-06, "epoch": 6.876574307304786, "percentage": 68.77, "elapsed_time": "0:34:41", "remaining_time": "0:15:45"}
287
+ {"current_steps": 2740, "total_steps": 3970, "loss": 0.3807, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.207699722084939e-06, "epoch": 6.9017632241813605, "percentage": 69.02, "elapsed_time": "0:34:47", "remaining_time": "0:15:37"}
288
+ {"current_steps": 2750, "total_steps": 3970, "loss": 0.4045, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.174800412963014e-06, "epoch": 6.926952141057934, "percentage": 69.27, "elapsed_time": "0:34:54", "remaining_time": "0:15:29"}
289
+ {"current_steps": 2760, "total_steps": 3970, "loss": 0.4135, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.1420798152994676e-06, "epoch": 6.952141057934509, "percentage": 69.52, "elapsed_time": "0:35:00", "remaining_time": "0:15:21"}
290
+ {"current_steps": 2770, "total_steps": 3970, "loss": 0.3883, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.1095399988757574e-06, "epoch": 6.977329974811083, "percentage": 69.77, "elapsed_time": "0:35:07", "remaining_time": "0:15:13"}
291
+ {"current_steps": 2780, "total_steps": 3970, "loss": 0.3811, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.0771830220378114e-06, "epoch": 7.002518891687657, "percentage": 70.03, "elapsed_time": "0:35:14", "remaining_time": "0:15:04"}
292
+ {"current_steps": 2790, "total_steps": 3970, "loss": 0.3993, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.045010931565804e-06, "epoch": 7.027707808564232, "percentage": 70.28, "elapsed_time": "0:35:20", "remaining_time": "0:14:56"}
293
+ {"current_steps": 2800, "total_steps": 3970, "loss": 0.4103, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": 2.0130257625447016e-06, "epoch": 7.052896725440806, "percentage": 70.53, "elapsed_time": "0:35:26", "remaining_time": "0:14:48"}
294
+ {"current_steps": 2800, "total_steps": 3970, "loss": null, "eval_loss": 0.46071410179138184, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 7.052896725440806, "percentage": 70.53, "elapsed_time": "0:35:26", "remaining_time": "0:14:48"}
295
+ {"current_steps": 2800, "total_steps": 3970, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 7.052896725440806, "percentage": 70.53, "elapsed_time": "0:35:26", "remaining_time": "0:14:48"}
296
+ {"current_steps": 71, "total_steps": 71, "loss": null, "eval_loss": 0.4556237757205963, "predict_loss": null, "reward": null, "accuracy": null, "learning_rate": null, "epoch": 7.052896725440806, "percentage": 100.0, "elapsed_time": "0:35:59", "remaining_time": "0:00:00"}
llama3_8b_peft/gsm8k/trainer_state.json ADDED
@@ -0,0 +1,2102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4556237757205963,
3
+ "best_model_checkpoint": "ckpt/llama3_8b_fuze27_no_sys/gsm8k_no_sys/checkpoint-2000",
4
+ "epoch": 7.052896725440806,
5
+ "eval_steps": 200,
6
+ "global_step": 2800,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02518891687657431,
13
+ "grad_norm": 0.8475966453552246,
14
+ "learning_rate": 5e-06,
15
+ "loss": 1.2636,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.05037783375314862,
20
+ "grad_norm": 0.7087691426277161,
21
+ "learning_rate": 1e-05,
22
+ "loss": 1.2109,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.07556675062972293,
27
+ "grad_norm": 0.9212547540664673,
28
+ "learning_rate": 9.999841859439597e-06,
29
+ "loss": 1.2202,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.10075566750629723,
34
+ "grad_norm": 0.9778852462768555,
35
+ "learning_rate": 9.999367447761763e-06,
36
+ "loss": 1.1029,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.12594458438287154,
41
+ "grad_norm": 0.8553369641304016,
42
+ "learning_rate": 9.998576794975987e-06,
43
+ "loss": 0.9679,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.15113350125944586,
48
+ "grad_norm": 1.016830563545227,
49
+ "learning_rate": 9.997469951095982e-06,
50
+ "loss": 0.8076,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.17632241813602015,
55
+ "grad_norm": 0.7716158628463745,
56
+ "learning_rate": 9.99604698613651e-06,
57
+ "loss": 0.7149,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.20151133501259447,
62
+ "grad_norm": 0.6571676731109619,
63
+ "learning_rate": 9.994307990108963e-06,
64
+ "loss": 0.6482,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.22670025188916876,
69
+ "grad_norm": 0.7680717706680298,
70
+ "learning_rate": 9.992253073015664e-06,
71
+ "loss": 0.629,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.2518891687657431,
76
+ "grad_norm": 0.7847670912742615,
77
+ "learning_rate": 9.989882364842906e-06,
78
+ "loss": 0.589,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.2770780856423174,
83
+ "grad_norm": 0.700131893157959,
84
+ "learning_rate": 9.987196015552742e-06,
85
+ "loss": 0.6087,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.3022670025188917,
90
+ "grad_norm": 0.7196058630943298,
91
+ "learning_rate": 9.98419419507348e-06,
92
+ "loss": 0.5849,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.327455919395466,
97
+ "grad_norm": 0.5935602784156799,
98
+ "learning_rate": 9.980877093288953e-06,
99
+ "loss": 0.5495,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.3526448362720403,
104
+ "grad_norm": 0.7468442320823669,
105
+ "learning_rate": 9.97724492002649e-06,
106
+ "loss": 0.5916,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.3778337531486146,
111
+ "grad_norm": 0.7292435169219971,
112
+ "learning_rate": 9.973297905043662e-06,
113
+ "loss": 0.624,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.40302267002518893,
118
+ "grad_norm": 0.7489534616470337,
119
+ "learning_rate": 9.969036298013732e-06,
120
+ "loss": 0.5867,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.4282115869017632,
125
+ "grad_norm": 0.8862701058387756,
126
+ "learning_rate": 9.964460368509868e-06,
127
+ "loss": 0.5561,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.4534005037783375,
132
+ "grad_norm": 0.7557389736175537,
133
+ "learning_rate": 9.959570405988096e-06,
134
+ "loss": 0.577,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.47858942065491183,
139
+ "grad_norm": 0.7594360709190369,
140
+ "learning_rate": 9.954366719768975e-06,
141
+ "loss": 0.5833,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.5037783375314862,
146
+ "grad_norm": 0.7483209371566772,
147
+ "learning_rate": 9.948849639018055e-06,
148
+ "loss": 0.5398,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.5037783375314862,
153
+ "eval_loss": 0.5354337692260742,
154
+ "eval_runtime": 13.2871,
155
+ "eval_samples_per_second": 84.368,
156
+ "eval_steps_per_second": 5.344,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.5289672544080605,
161
+ "grad_norm": 0.7889060378074646,
162
+ "learning_rate": 9.943019512725026e-06,
163
+ "loss": 0.5574,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.5541561712846348,
168
+ "grad_norm": 0.7938101887702942,
169
+ "learning_rate": 9.936876709681668e-06,
170
+ "loss": 0.5173,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.5793450881612091,
175
+ "grad_norm": 1.0842037200927734,
176
+ "learning_rate": 9.930421618458506e-06,
177
+ "loss": 0.5581,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.6045340050377834,
182
+ "grad_norm": 0.7992810010910034,
183
+ "learning_rate": 9.923654647380236e-06,
184
+ "loss": 0.5654,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.6297229219143576,
189
+ "grad_norm": 0.7254189252853394,
190
+ "learning_rate": 9.916576224499898e-06,
191
+ "loss": 0.5542,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.654911838790932,
196
+ "grad_norm": 0.7451283931732178,
197
+ "learning_rate": 9.9091867975718e-06,
198
+ "loss": 0.5195,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.6801007556675063,
203
+ "grad_norm": 0.72111576795578,
204
+ "learning_rate": 9.901486834023182e-06,
205
+ "loss": 0.5479,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.7052896725440806,
210
+ "grad_norm": 0.7377191781997681,
211
+ "learning_rate": 9.893476820924668e-06,
212
+ "loss": 0.5274,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.7304785894206549,
217
+ "grad_norm": 0.82171630859375,
218
+ "learning_rate": 9.885157264959442e-06,
219
+ "loss": 0.5228,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.7556675062972292,
224
+ "grad_norm": 0.7577778100967407,
225
+ "learning_rate": 9.8765286923912e-06,
226
+ "loss": 0.5234,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.7808564231738035,
231
+ "grad_norm": 0.8242009878158569,
232
+ "learning_rate": 9.867591649030863e-06,
233
+ "loss": 0.5475,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.8060453400503779,
238
+ "grad_norm": 0.8752724528312683,
239
+ "learning_rate": 9.85834670020205e-06,
240
+ "loss": 0.5108,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.8312342569269522,
245
+ "grad_norm": 0.8230427503585815,
246
+ "learning_rate": 9.848794430705317e-06,
247
+ "loss": 0.5085,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.8564231738035264,
252
+ "grad_norm": 0.8293437361717224,
253
+ "learning_rate": 9.838935444781162e-06,
254
+ "loss": 0.5449,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.8816120906801007,
259
+ "grad_norm": 0.8840632438659668,
260
+ "learning_rate": 9.82877036607181e-06,
261
+ "loss": 0.5203,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.906801007556675,
266
+ "grad_norm": 0.8337811827659607,
267
+ "learning_rate": 9.818299837581758e-06,
268
+ "loss": 0.5225,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.9319899244332494,
273
+ "grad_norm": 0.8606433272361755,
274
+ "learning_rate": 9.807524521637103e-06,
275
+ "loss": 0.5323,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.9571788413098237,
280
+ "grad_norm": 0.8039270639419556,
281
+ "learning_rate": 9.796445099843648e-06,
282
+ "loss": 0.5494,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.982367758186398,
287
+ "grad_norm": 0.922153651714325,
288
+ "learning_rate": 9.785062273043778e-06,
289
+ "loss": 0.5503,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 1.0075566750629723,
294
+ "grad_norm": 0.8404749631881714,
295
+ "learning_rate": 9.77337676127214e-06,
296
+ "loss": 0.5337,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 1.0075566750629723,
301
+ "eval_loss": 0.5050087571144104,
302
+ "eval_runtime": 13.3079,
303
+ "eval_samples_per_second": 84.236,
304
+ "eval_steps_per_second": 5.335,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 1.0327455919395465,
309
+ "grad_norm": 0.8640243411064148,
310
+ "learning_rate": 9.761389303710085e-06,
311
+ "loss": 0.5138,
312
+ "step": 410
313
+ },
314
+ {
315
+ "epoch": 1.057934508816121,
316
+ "grad_norm": 0.8795576095581055,
317
+ "learning_rate": 9.749100658638914e-06,
318
+ "loss": 0.5152,
319
+ "step": 420
320
+ },
321
+ {
322
+ "epoch": 1.0831234256926952,
323
+ "grad_norm": 0.8557228446006775,
324
+ "learning_rate": 9.736511603391917e-06,
325
+ "loss": 0.5208,
326
+ "step": 430
327
+ },
328
+ {
329
+ "epoch": 1.1083123425692696,
330
+ "grad_norm": 0.8721880912780762,
331
+ "learning_rate": 9.723622934305193e-06,
332
+ "loss": 0.5271,
333
+ "step": 440
334
+ },
335
+ {
336
+ "epoch": 1.1335012594458438,
337
+ "grad_norm": 0.7951139807701111,
338
+ "learning_rate": 9.710435466667281e-06,
339
+ "loss": 0.4956,
340
+ "step": 450
341
+ },
342
+ {
343
+ "epoch": 1.1586901763224182,
344
+ "grad_norm": 0.7813353538513184,
345
+ "learning_rate": 9.696950034667595e-06,
346
+ "loss": 0.514,
347
+ "step": 460
348
+ },
349
+ {
350
+ "epoch": 1.1838790931989924,
351
+ "grad_norm": 0.8839511275291443,
352
+ "learning_rate": 9.68316749134364e-06,
353
+ "loss": 0.4965,
354
+ "step": 470
355
+ },
356
+ {
357
+ "epoch": 1.2090680100755669,
358
+ "grad_norm": 0.9440765380859375,
359
+ "learning_rate": 9.669088708527068e-06,
360
+ "loss": 0.5264,
361
+ "step": 480
362
+ },
363
+ {
364
+ "epoch": 1.234256926952141,
365
+ "grad_norm": 0.9030229449272156,
366
+ "learning_rate": 9.654714576788521e-06,
367
+ "loss": 0.478,
368
+ "step": 490
369
+ },
370
+ {
371
+ "epoch": 1.2594458438287153,
372
+ "grad_norm": 0.9342619776725769,
373
+ "learning_rate": 9.640046005381299e-06,
374
+ "loss": 0.5274,
375
+ "step": 500
376
+ },
377
+ {
378
+ "epoch": 1.2846347607052897,
379
+ "grad_norm": 0.9194152355194092,
380
+ "learning_rate": 9.62508392218384e-06,
381
+ "loss": 0.4912,
382
+ "step": 510
383
+ },
384
+ {
385
+ "epoch": 1.309823677581864,
386
+ "grad_norm": 1.1548001766204834,
387
+ "learning_rate": 9.609829273641034e-06,
388
+ "loss": 0.4978,
389
+ "step": 520
390
+ },
391
+ {
392
+ "epoch": 1.3350125944584383,
393
+ "grad_norm": 0.8560693264007568,
394
+ "learning_rate": 9.59428302470435e-06,
395
+ "loss": 0.5496,
396
+ "step": 530
397
+ },
398
+ {
399
+ "epoch": 1.3602015113350125,
400
+ "grad_norm": 1.0015591382980347,
401
+ "learning_rate": 9.578446158770795e-06,
402
+ "loss": 0.4816,
403
+ "step": 540
404
+ },
405
+ {
406
+ "epoch": 1.385390428211587,
407
+ "grad_norm": 1.0857878923416138,
408
+ "learning_rate": 9.56231967762071e-06,
409
+ "loss": 0.5116,
410
+ "step": 550
411
+ },
412
+ {
413
+ "epoch": 1.4105793450881612,
414
+ "grad_norm": 1.1834816932678223,
415
+ "learning_rate": 9.545904601354402e-06,
416
+ "loss": 0.5327,
417
+ "step": 560
418
+ },
419
+ {
420
+ "epoch": 1.4357682619647356,
421
+ "grad_norm": 0.902275025844574,
422
+ "learning_rate": 9.529201968327618e-06,
423
+ "loss": 0.4983,
424
+ "step": 570
425
+ },
426
+ {
427
+ "epoch": 1.4609571788413098,
428
+ "grad_norm": 0.8934423327445984,
429
+ "learning_rate": 9.51221283508585e-06,
430
+ "loss": 0.546,
431
+ "step": 580
432
+ },
433
+ {
434
+ "epoch": 1.486146095717884,
435
+ "grad_norm": 1.0359569787979126,
436
+ "learning_rate": 9.494938276297523e-06,
437
+ "loss": 0.5256,
438
+ "step": 590
439
+ },
440
+ {
441
+ "epoch": 1.5113350125944585,
442
+ "grad_norm": 1.0701916217803955,
443
+ "learning_rate": 9.477379384686e-06,
444
+ "loss": 0.5133,
445
+ "step": 600
446
+ },
447
+ {
448
+ "epoch": 1.5113350125944585,
449
+ "eval_loss": 0.4898512363433838,
450
+ "eval_runtime": 23.876,
451
+ "eval_samples_per_second": 46.951,
452
+ "eval_steps_per_second": 2.974,
453
+ "step": 600
454
+ },
455
+ {
456
+ "epoch": 1.536523929471033,
457
+ "grad_norm": 1.2980886697769165,
458
+ "learning_rate": 9.459537270960464e-06,
459
+ "loss": 0.4974,
460
+ "step": 610
461
+ },
462
+ {
463
+ "epoch": 1.561712846347607,
464
+ "grad_norm": 1.06404447555542,
465
+ "learning_rate": 9.44141306374566e-06,
466
+ "loss": 0.5036,
467
+ "step": 620
468
+ },
469
+ {
470
+ "epoch": 1.5869017632241813,
471
+ "grad_norm": 0.922306478023529,
472
+ "learning_rate": 9.423007909510504e-06,
473
+ "loss": 0.5056,
474
+ "step": 630
475
+ },
476
+ {
477
+ "epoch": 1.6120906801007555,
478
+ "grad_norm": 0.9224857091903687,
479
+ "learning_rate": 9.404322972495555e-06,
480
+ "loss": 0.5134,
481
+ "step": 640
482
+ },
483
+ {
484
+ "epoch": 1.63727959697733,
485
+ "grad_norm": 0.825790286064148,
486
+ "learning_rate": 9.385359434639381e-06,
487
+ "loss": 0.4734,
488
+ "step": 650
489
+ },
490
+ {
491
+ "epoch": 1.6624685138539044,
492
+ "grad_norm": 1.1901260614395142,
493
+ "learning_rate": 9.36611849550378e-06,
494
+ "loss": 0.4851,
495
+ "step": 660
496
+ },
497
+ {
498
+ "epoch": 1.6876574307304786,
499
+ "grad_norm": 1.1009478569030762,
500
+ "learning_rate": 9.346601372197914e-06,
501
+ "loss": 0.4863,
502
+ "step": 670
503
+ },
504
+ {
505
+ "epoch": 1.7128463476070528,
506
+ "grad_norm": 0.9417995810508728,
507
+ "learning_rate": 9.326809299301308e-06,
508
+ "loss": 0.4972,
509
+ "step": 680
510
+ },
511
+ {
512
+ "epoch": 1.7380352644836272,
513
+ "grad_norm": 1.0709220170974731,
514
+ "learning_rate": 9.306743528785762e-06,
515
+ "loss": 0.4876,
516
+ "step": 690
517
+ },
518
+ {
519
+ "epoch": 1.7632241813602016,
520
+ "grad_norm": 0.9696930646896362,
521
+ "learning_rate": 9.286405329936153e-06,
522
+ "loss": 0.5068,
523
+ "step": 700
524
+ },
525
+ {
526
+ "epoch": 1.7884130982367759,
527
+ "grad_norm": 1.154576301574707,
528
+ "learning_rate": 9.265795989270148e-06,
529
+ "loss": 0.4894,
530
+ "step": 710
531
+ },
532
+ {
533
+ "epoch": 1.81360201511335,
534
+ "grad_norm": 1.1200720071792603,
535
+ "learning_rate": 9.244916810456822e-06,
536
+ "loss": 0.4776,
537
+ "step": 720
538
+ },
539
+ {
540
+ "epoch": 1.8387909319899243,
541
+ "grad_norm": 1.1224340200424194,
542
+ "learning_rate": 9.223769114234185e-06,
543
+ "loss": 0.4689,
544
+ "step": 730
545
+ },
546
+ {
547
+ "epoch": 1.8639798488664987,
548
+ "grad_norm": 1.3083769083023071,
549
+ "learning_rate": 9.202354238325652e-06,
550
+ "loss": 0.4869,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 1.8891687657430731,
555
+ "grad_norm": 1.0586867332458496,
556
+ "learning_rate": 9.180673537355414e-06,
557
+ "loss": 0.4767,
558
+ "step": 750
559
+ },
560
+ {
561
+ "epoch": 1.9143576826196473,
562
+ "grad_norm": 1.1514278650283813,
563
+ "learning_rate": 9.158728382762753e-06,
564
+ "loss": 0.5064,
565
+ "step": 760
566
+ },
567
+ {
568
+ "epoch": 1.9395465994962215,
569
+ "grad_norm": 1.187637209892273,
570
+ "learning_rate": 9.136520162715288e-06,
571
+ "loss": 0.5379,
572
+ "step": 770
573
+ },
574
+ {
575
+ "epoch": 1.964735516372796,
576
+ "grad_norm": 0.9145538210868835,
577
+ "learning_rate": 9.11405028202116e-06,
578
+ "loss": 0.4965,
579
+ "step": 780
580
+ },
581
+ {
582
+ "epoch": 1.9899244332493704,
583
+ "grad_norm": 1.2994606494903564,
584
+ "learning_rate": 9.091320162040183e-06,
585
+ "loss": 0.4806,
586
+ "step": 790
587
+ },
588
+ {
589
+ "epoch": 2.0151133501259446,
590
+ "grad_norm": 1.186442494392395,
591
+ "learning_rate": 9.068331240593919e-06,
592
+ "loss": 0.4912,
593
+ "step": 800
594
+ },
595
+ {
596
+ "epoch": 2.0151133501259446,
597
+ "eval_loss": 0.47740957140922546,
598
+ "eval_runtime": 26.8697,
599
+ "eval_samples_per_second": 41.72,
600
+ "eval_steps_per_second": 2.642,
601
+ "step": 800
602
+ },
603
+ {
604
+ "epoch": 2.040302267002519,
605
+ "grad_norm": 1.0701855421066284,
606
+ "learning_rate": 9.045084971874738e-06,
607
+ "loss": 0.5086,
608
+ "step": 810
609
+ },
610
+ {
611
+ "epoch": 2.065491183879093,
612
+ "grad_norm": 1.139443039894104,
613
+ "learning_rate": 9.021582826353825e-06,
614
+ "loss": 0.4363,
615
+ "step": 820
616
+ },
617
+ {
618
+ "epoch": 2.0906801007556677,
619
+ "grad_norm": 1.1993327140808105,
620
+ "learning_rate": 8.997826290688165e-06,
621
+ "loss": 0.486,
622
+ "step": 830
623
+ },
624
+ {
625
+ "epoch": 2.115869017632242,
626
+ "grad_norm": 1.0501916408538818,
627
+ "learning_rate": 8.973816867626503e-06,
628
+ "loss": 0.5239,
629
+ "step": 840
630
+ },
631
+ {
632
+ "epoch": 2.141057934508816,
633
+ "grad_norm": 1.1302216053009033,
634
+ "learning_rate": 8.949556075914286e-06,
635
+ "loss": 0.4903,
636
+ "step": 850
637
+ },
638
+ {
639
+ "epoch": 2.1662468513853903,
640
+ "grad_norm": 1.2666516304016113,
641
+ "learning_rate": 8.925045450197593e-06,
642
+ "loss": 0.4497,
643
+ "step": 860
644
+ },
645
+ {
646
+ "epoch": 2.1914357682619645,
647
+ "grad_norm": 1.0452088117599487,
648
+ "learning_rate": 8.900286540926062e-06,
649
+ "loss": 0.4906,
650
+ "step": 870
651
+ },
652
+ {
653
+ "epoch": 2.216624685138539,
654
+ "grad_norm": 1.104224681854248,
655
+ "learning_rate": 8.875280914254803e-06,
656
+ "loss": 0.5016,
657
+ "step": 880
658
+ },
659
+ {
660
+ "epoch": 2.2418136020151134,
661
+ "grad_norm": 1.1546283960342407,
662
+ "learning_rate": 8.850030151945343e-06,
663
+ "loss": 0.477,
664
+ "step": 890
665
+ },
666
+ {
667
+ "epoch": 2.2670025188916876,
668
+ "grad_norm": 1.3012408018112183,
669
+ "learning_rate": 8.824535851265565e-06,
670
+ "loss": 0.4967,
671
+ "step": 900
672
+ },
673
+ {
674
+ "epoch": 2.292191435768262,
675
+ "grad_norm": 1.1640158891677856,
676
+ "learning_rate": 8.798799624888665e-06,
677
+ "loss": 0.4833,
678
+ "step": 910
679
+ },
680
+ {
681
+ "epoch": 2.3173803526448364,
682
+ "grad_norm": 1.3770086765289307,
683
+ "learning_rate": 8.772823100791152e-06,
684
+ "loss": 0.4745,
685
+ "step": 920
686
+ },
687
+ {
688
+ "epoch": 2.3425692695214106,
689
+ "grad_norm": 1.1464217901229858,
690
+ "learning_rate": 8.746607922149853e-06,
691
+ "loss": 0.4719,
692
+ "step": 930
693
+ },
694
+ {
695
+ "epoch": 2.367758186397985,
696
+ "grad_norm": 1.2240654230117798,
697
+ "learning_rate": 8.720155747237985e-06,
698
+ "loss": 0.4749,
699
+ "step": 940
700
+ },
701
+ {
702
+ "epoch": 2.392947103274559,
703
+ "grad_norm": 1.0342758893966675,
704
+ "learning_rate": 8.693468249320257e-06,
705
+ "loss": 0.457,
706
+ "step": 950
707
+ },
708
+ {
709
+ "epoch": 2.4181360201511337,
710
+ "grad_norm": 1.029079794883728,
711
+ "learning_rate": 8.666547116547015e-06,
712
+ "loss": 0.4793,
713
+ "step": 960
714
+ },
715
+ {
716
+ "epoch": 2.443324937027708,
717
+ "grad_norm": 1.2524383068084717,
718
+ "learning_rate": 8.639394051847472e-06,
719
+ "loss": 0.4589,
720
+ "step": 970
721
+ },
722
+ {
723
+ "epoch": 2.468513853904282,
724
+ "grad_norm": 1.2764012813568115,
725
+ "learning_rate": 8.612010772821972e-06,
726
+ "loss": 0.4882,
727
+ "step": 980
728
+ },
729
+ {
730
+ "epoch": 2.4937027707808563,
731
+ "grad_norm": 1.1041889190673828,
732
+ "learning_rate": 8.584399011633356e-06,
733
+ "loss": 0.4775,
734
+ "step": 990
735
+ },
736
+ {
737
+ "epoch": 2.5188916876574305,
738
+ "grad_norm": 1.5502779483795166,
739
+ "learning_rate": 8.556560514897376e-06,
740
+ "loss": 0.4573,
741
+ "step": 1000
742
+ },
743
+ {
744
+ "epoch": 2.5188916876574305,
745
+ "eval_loss": 0.470566064119339,
746
+ "eval_runtime": 13.291,
747
+ "eval_samples_per_second": 84.343,
748
+ "eval_steps_per_second": 5.342,
749
+ "step": 1000
750
+ },
751
+ {
752
+ "epoch": 2.544080604534005,
753
+ "grad_norm": 1.226182460784912,
754
+ "learning_rate": 8.528497043572222e-06,
755
+ "loss": 0.48,
756
+ "step": 1010
757
+ },
758
+ {
759
+ "epoch": 2.5692695214105794,
760
+ "grad_norm": 1.604488492012024,
761
+ "learning_rate": 8.500210372847128e-06,
762
+ "loss": 0.4544,
763
+ "step": 1020
764
+ },
765
+ {
766
+ "epoch": 2.5944584382871536,
767
+ "grad_norm": 1.2570867538452148,
768
+ "learning_rate": 8.471702292030078e-06,
769
+ "loss": 0.4977,
770
+ "step": 1030
771
+ },
772
+ {
773
+ "epoch": 2.619647355163728,
774
+ "grad_norm": 1.193834662437439,
775
+ "learning_rate": 8.44297460443462e-06,
776
+ "loss": 0.4467,
777
+ "step": 1040
778
+ },
779
+ {
780
+ "epoch": 2.644836272040302,
781
+ "grad_norm": 1.1679896116256714,
782
+ "learning_rate": 8.414029127265803e-06,
783
+ "loss": 0.4527,
784
+ "step": 1050
785
+ },
786
+ {
787
+ "epoch": 2.6700251889168767,
788
+ "grad_norm": 1.1200467348098755,
789
+ "learning_rate": 8.38486769150522e-06,
790
+ "loss": 0.468,
791
+ "step": 1060
792
+ },
793
+ {
794
+ "epoch": 2.695214105793451,
795
+ "grad_norm": 1.2098541259765625,
796
+ "learning_rate": 8.355492141795185e-06,
797
+ "loss": 0.4817,
798
+ "step": 1070
799
+ },
800
+ {
801
+ "epoch": 2.720403022670025,
802
+ "grad_norm": 1.5903912782669067,
803
+ "learning_rate": 8.325904336322057e-06,
804
+ "loss": 0.4718,
805
+ "step": 1080
806
+ },
807
+ {
808
+ "epoch": 2.7455919395465997,
809
+ "grad_norm": 1.2352293729782104,
810
+ "learning_rate": 8.296106146698693e-06,
811
+ "loss": 0.472,
812
+ "step": 1090
813
+ },
814
+ {
815
+ "epoch": 2.770780856423174,
816
+ "grad_norm": 1.0329700708389282,
817
+ "learning_rate": 8.266099457846052e-06,
818
+ "loss": 0.4461,
819
+ "step": 1100
820
+ },
821
+ {
822
+ "epoch": 2.795969773299748,
823
+ "grad_norm": 1.3912144899368286,
824
+ "learning_rate": 8.235886167873975e-06,
825
+ "loss": 0.4528,
826
+ "step": 1110
827
+ },
828
+ {
829
+ "epoch": 2.8211586901763224,
830
+ "grad_norm": 1.325369119644165,
831
+ "learning_rate": 8.2054681879611e-06,
832
+ "loss": 0.4638,
833
+ "step": 1120
834
+ },
835
+ {
836
+ "epoch": 2.8463476070528966,
837
+ "grad_norm": 1.3374369144439697,
838
+ "learning_rate": 8.174847442233988e-06,
839
+ "loss": 0.4538,
840
+ "step": 1130
841
+ },
842
+ {
843
+ "epoch": 2.8715365239294712,
844
+ "grad_norm": 1.2668341398239136,
845
+ "learning_rate": 8.144025867645391e-06,
846
+ "loss": 0.4563,
847
+ "step": 1140
848
+ },
849
+ {
850
+ "epoch": 2.8967254408060454,
851
+ "grad_norm": 1.0515339374542236,
852
+ "learning_rate": 8.113005413851742e-06,
853
+ "loss": 0.4484,
854
+ "step": 1150
855
+ },
856
+ {
857
+ "epoch": 2.9219143576826196,
858
+ "grad_norm": 1.4422743320465088,
859
+ "learning_rate": 8.081788043089818e-06,
860
+ "loss": 0.4687,
861
+ "step": 1160
862
+ },
863
+ {
864
+ "epoch": 2.947103274559194,
865
+ "grad_norm": 1.141394853591919,
866
+ "learning_rate": 8.050375730052622e-06,
867
+ "loss": 0.4463,
868
+ "step": 1170
869
+ },
870
+ {
871
+ "epoch": 2.972292191435768,
872
+ "grad_norm": 1.4133881330490112,
873
+ "learning_rate": 8.018770461764471e-06,
874
+ "loss": 0.4676,
875
+ "step": 1180
876
+ },
877
+ {
878
+ "epoch": 2.9974811083123427,
879
+ "grad_norm": 1.1865912675857544,
880
+ "learning_rate": 7.986974237455298e-06,
881
+ "loss": 0.4539,
882
+ "step": 1190
883
+ },
884
+ {
885
+ "epoch": 3.022670025188917,
886
+ "grad_norm": 1.2832095623016357,
887
+ "learning_rate": 7.954989068434198e-06,
888
+ "loss": 0.4628,
889
+ "step": 1200
890
+ },
891
+ {
892
+ "epoch": 3.022670025188917,
893
+ "eval_loss": 0.46436572074890137,
894
+ "eval_runtime": 14.2922,
895
+ "eval_samples_per_second": 78.434,
896
+ "eval_steps_per_second": 4.968,
897
+ "step": 1200
898
+ },
899
+ {
900
+ "epoch": 3.047858942065491,
901
+ "grad_norm": 1.3625766038894653,
902
+ "learning_rate": 7.922816977962191e-06,
903
+ "loss": 0.4232,
904
+ "step": 1210
905
+ },
906
+ {
907
+ "epoch": 3.0730478589420653,
908
+ "grad_norm": 1.343997836112976,
909
+ "learning_rate": 7.890460001124242e-06,
910
+ "loss": 0.4422,
911
+ "step": 1220
912
+ },
913
+ {
914
+ "epoch": 3.09823677581864,
915
+ "grad_norm": 1.5123403072357178,
916
+ "learning_rate": 7.857920184700534e-06,
917
+ "loss": 0.4483,
918
+ "step": 1230
919
+ },
920
+ {
921
+ "epoch": 3.123425692695214,
922
+ "grad_norm": 2.1442222595214844,
923
+ "learning_rate": 7.825199587036989e-06,
924
+ "loss": 0.4638,
925
+ "step": 1240
926
+ },
927
+ {
928
+ "epoch": 3.1486146095717884,
929
+ "grad_norm": 1.526623010635376,
930
+ "learning_rate": 7.792300277915062e-06,
931
+ "loss": 0.4417,
932
+ "step": 1250
933
+ },
934
+ {
935
+ "epoch": 3.1738035264483626,
936
+ "grad_norm": 1.683313012123108,
937
+ "learning_rate": 7.759224338420832e-06,
938
+ "loss": 0.4677,
939
+ "step": 1260
940
+ },
941
+ {
942
+ "epoch": 3.1989924433249373,
943
+ "grad_norm": 1.5938961505889893,
944
+ "learning_rate": 7.725973860813338e-06,
945
+ "loss": 0.4304,
946
+ "step": 1270
947
+ },
948
+ {
949
+ "epoch": 3.2241813602015115,
950
+ "grad_norm": 1.438961148262024,
951
+ "learning_rate": 7.692550948392248e-06,
952
+ "loss": 0.4818,
953
+ "step": 1280
954
+ },
955
+ {
956
+ "epoch": 3.2493702770780857,
957
+ "grad_norm": 1.3866137266159058,
958
+ "learning_rate": 7.658957715364801e-06,
959
+ "loss": 0.4483,
960
+ "step": 1290
961
+ },
962
+ {
963
+ "epoch": 3.27455919395466,
964
+ "grad_norm": 1.456385612487793,
965
+ "learning_rate": 7.625196286712076e-06,
966
+ "loss": 0.4417,
967
+ "step": 1300
968
+ },
969
+ {
970
+ "epoch": 3.299748110831234,
971
+ "grad_norm": 1.5823540687561035,
972
+ "learning_rate": 7.591268798054569e-06,
973
+ "loss": 0.4722,
974
+ "step": 1310
975
+ },
976
+ {
977
+ "epoch": 3.3249370277078087,
978
+ "grad_norm": 1.2863452434539795,
979
+ "learning_rate": 7.5571773955171124e-06,
980
+ "loss": 0.4739,
981
+ "step": 1320
982
+ },
983
+ {
984
+ "epoch": 3.350125944584383,
985
+ "grad_norm": 1.3515551090240479,
986
+ "learning_rate": 7.522924235593103e-06,
987
+ "loss": 0.4062,
988
+ "step": 1330
989
+ },
990
+ {
991
+ "epoch": 3.375314861460957,
992
+ "grad_norm": 1.2392518520355225,
993
+ "learning_rate": 7.488511485008106e-06,
994
+ "loss": 0.4644,
995
+ "step": 1340
996
+ },
997
+ {
998
+ "epoch": 3.4005037783375314,
999
+ "grad_norm": 1.5015583038330078,
1000
+ "learning_rate": 7.453941320582785e-06,
1001
+ "loss": 0.451,
1002
+ "step": 1350
1003
+ },
1004
+ {
1005
+ "epoch": 3.4256926952141056,
1006
+ "grad_norm": 1.409325122833252,
1007
+ "learning_rate": 7.419215929095211e-06,
1008
+ "loss": 0.4254,
1009
+ "step": 1360
1010
+ },
1011
+ {
1012
+ "epoch": 3.4508816120906802,
1013
+ "grad_norm": 1.6640633344650269,
1014
+ "learning_rate": 7.3843375071425315e-06,
1015
+ "loss": 0.4723,
1016
+ "step": 1370
1017
+ },
1018
+ {
1019
+ "epoch": 3.4760705289672544,
1020
+ "grad_norm": 1.5667325258255005,
1021
+ "learning_rate": 7.349308261002023e-06,
1022
+ "loss": 0.4442,
1023
+ "step": 1380
1024
+ },
1025
+ {
1026
+ "epoch": 3.5012594458438286,
1027
+ "grad_norm": 1.4159743785858154,
1028
+ "learning_rate": 7.314130406491533e-06,
1029
+ "loss": 0.441,
1030
+ "step": 1390
1031
+ },
1032
+ {
1033
+ "epoch": 3.5264483627204033,
1034
+ "grad_norm": 1.4985648393630981,
1035
+ "learning_rate": 7.278806168829313e-06,
1036
+ "loss": 0.4429,
1037
+ "step": 1400
1038
+ },
1039
+ {
1040
+ "epoch": 3.5264483627204033,
1041
+ "eval_loss": 0.45942774415016174,
1042
+ "eval_runtime": 13.8313,
1043
+ "eval_samples_per_second": 81.048,
1044
+ "eval_steps_per_second": 5.133,
1045
+ "step": 1400
1046
+ },
1047
+ {
1048
+ "epoch": 3.551637279596977,
1049
+ "grad_norm": 1.6101562976837158,
1050
+ "learning_rate": 7.243337782493255e-06,
1051
+ "loss": 0.4372,
1052
+ "step": 1410
1053
+ },
1054
+ {
1055
+ "epoch": 3.5768261964735517,
1056
+ "grad_norm": 1.3172640800476074,
1057
+ "learning_rate": 7.2077274910795605e-06,
1058
+ "loss": 0.4204,
1059
+ "step": 1420
1060
+ },
1061
+ {
1062
+ "epoch": 3.602015113350126,
1063
+ "grad_norm": 1.3160916566848755,
1064
+ "learning_rate": 7.1719775471608025e-06,
1065
+ "loss": 0.4368,
1066
+ "step": 1430
1067
+ },
1068
+ {
1069
+ "epoch": 3.6272040302267,
1070
+ "grad_norm": 1.2061994075775146,
1071
+ "learning_rate": 7.136090212143447e-06,
1072
+ "loss": 0.4321,
1073
+ "step": 1440
1074
+ },
1075
+ {
1076
+ "epoch": 3.652392947103275,
1077
+ "grad_norm": 1.504187822341919,
1078
+ "learning_rate": 7.100067756124803e-06,
1079
+ "loss": 0.4576,
1080
+ "step": 1450
1081
+ },
1082
+ {
1083
+ "epoch": 3.677581863979849,
1084
+ "grad_norm": 1.499358057975769,
1085
+ "learning_rate": 7.063912457749426e-06,
1086
+ "loss": 0.443,
1087
+ "step": 1460
1088
+ },
1089
+ {
1090
+ "epoch": 3.702770780856423,
1091
+ "grad_norm": 1.3092291355133057,
1092
+ "learning_rate": 7.02762660406497e-06,
1093
+ "loss": 0.4369,
1094
+ "step": 1470
1095
+ },
1096
+ {
1097
+ "epoch": 3.7279596977329974,
1098
+ "grad_norm": 1.4406181573867798,
1099
+ "learning_rate": 6.991212490377532e-06,
1100
+ "loss": 0.4246,
1101
+ "step": 1480
1102
+ },
1103
+ {
1104
+ "epoch": 3.7531486146095716,
1105
+ "grad_norm": 1.5356957912445068,
1106
+ "learning_rate": 6.954672420106452e-06,
1107
+ "loss": 0.4307,
1108
+ "step": 1490
1109
+ },
1110
+ {
1111
+ "epoch": 3.7783375314861463,
1112
+ "grad_norm": 1.55060613155365,
1113
+ "learning_rate": 6.918008704638603e-06,
1114
+ "loss": 0.4304,
1115
+ "step": 1500
1116
+ },
1117
+ {
1118
+ "epoch": 3.8035264483627205,
1119
+ "grad_norm": 1.6245125532150269,
1120
+ "learning_rate": 6.8812236631821886e-06,
1121
+ "loss": 0.4817,
1122
+ "step": 1510
1123
+ },
1124
+ {
1125
+ "epoch": 3.8287153652392947,
1126
+ "grad_norm": 1.28324556350708,
1127
+ "learning_rate": 6.844319622620039e-06,
1128
+ "loss": 0.4646,
1129
+ "step": 1520
1130
+ },
1131
+ {
1132
+ "epoch": 3.853904282115869,
1133
+ "grad_norm": 1.7696768045425415,
1134
+ "learning_rate": 6.807298917362417e-06,
1135
+ "loss": 0.449,
1136
+ "step": 1530
1137
+ },
1138
+ {
1139
+ "epoch": 3.879093198992443,
1140
+ "grad_norm": 1.2908755540847778,
1141
+ "learning_rate": 6.7701638891993515e-06,
1142
+ "loss": 0.4498,
1143
+ "step": 1540
1144
+ },
1145
+ {
1146
+ "epoch": 3.9042821158690177,
1147
+ "grad_norm": 1.4110997915267944,
1148
+ "learning_rate": 6.732916887152508e-06,
1149
+ "loss": 0.4683,
1150
+ "step": 1550
1151
+ },
1152
+ {
1153
+ "epoch": 3.929471032745592,
1154
+ "grad_norm": 1.9216111898422241,
1155
+ "learning_rate": 6.695560267326599e-06,
1156
+ "loss": 0.4709,
1157
+ "step": 1560
1158
+ },
1159
+ {
1160
+ "epoch": 3.954659949622166,
1161
+ "grad_norm": 1.4406713247299194,
1162
+ "learning_rate": 6.65809639276034e-06,
1163
+ "loss": 0.4369,
1164
+ "step": 1570
1165
+ },
1166
+ {
1167
+ "epoch": 3.979848866498741,
1168
+ "grad_norm": 1.3992236852645874,
1169
+ "learning_rate": 6.62052763327698e-06,
1170
+ "loss": 0.4293,
1171
+ "step": 1580
1172
+ },
1173
+ {
1174
+ "epoch": 4.005037783375315,
1175
+ "grad_norm": 1.5759620666503906,
1176
+ "learning_rate": 6.582856365334389e-06,
1177
+ "loss": 0.4282,
1178
+ "step": 1590
1179
+ },
1180
+ {
1181
+ "epoch": 4.030226700251889,
1182
+ "grad_norm": 1.571846842765808,
1183
+ "learning_rate": 6.545084971874738e-06,
1184
+ "loss": 0.4058,
1185
+ "step": 1600
1186
+ },
1187
+ {
1188
+ "epoch": 4.030226700251889,
1189
+ "eval_loss": 0.45875710248947144,
1190
+ "eval_runtime": 13.8375,
1191
+ "eval_samples_per_second": 81.012,
1192
+ "eval_steps_per_second": 5.131,
1193
+ "step": 1600
1194
+ },
1195
+ {
1196
+ "epoch": 4.055415617128464,
1197
+ "grad_norm": 1.513992428779602,
1198
+ "learning_rate": 6.507215842173758e-06,
1199
+ "loss": 0.4402,
1200
+ "step": 1610
1201
+ },
1202
+ {
1203
+ "epoch": 4.080604534005038,
1204
+ "grad_norm": 1.4797428846359253,
1205
+ "learning_rate": 6.469251371689606e-06,
1206
+ "loss": 0.4431,
1207
+ "step": 1620
1208
+ },
1209
+ {
1210
+ "epoch": 4.105793450881612,
1211
+ "grad_norm": 1.7374969720840454,
1212
+ "learning_rate": 6.431193961911336e-06,
1213
+ "loss": 0.4348,
1214
+ "step": 1630
1215
+ },
1216
+ {
1217
+ "epoch": 4.130982367758186,
1218
+ "grad_norm": 2.12467885017395,
1219
+ "learning_rate": 6.393046020206995e-06,
1220
+ "loss": 0.4343,
1221
+ "step": 1640
1222
+ },
1223
+ {
1224
+ "epoch": 4.156171284634761,
1225
+ "grad_norm": 1.7614175081253052,
1226
+ "learning_rate": 6.354809959671331e-06,
1227
+ "loss": 0.4527,
1228
+ "step": 1650
1229
+ },
1230
+ {
1231
+ "epoch": 4.181360201511335,
1232
+ "grad_norm": 1.7423946857452393,
1233
+ "learning_rate": 6.316488198973162e-06,
1234
+ "loss": 0.4277,
1235
+ "step": 1660
1236
+ },
1237
+ {
1238
+ "epoch": 4.206549118387909,
1239
+ "grad_norm": 1.7474662065505981,
1240
+ "learning_rate": 6.278083162202374e-06,
1241
+ "loss": 0.4261,
1242
+ "step": 1670
1243
+ },
1244
+ {
1245
+ "epoch": 4.231738035264484,
1246
+ "grad_norm": 1.5940552949905396,
1247
+ "learning_rate": 6.239597278716581e-06,
1248
+ "loss": 0.4353,
1249
+ "step": 1680
1250
+ },
1251
+ {
1252
+ "epoch": 4.2569269521410575,
1253
+ "grad_norm": 1.4053945541381836,
1254
+ "learning_rate": 6.201032982987456e-06,
1255
+ "loss": 0.4406,
1256
+ "step": 1690
1257
+ },
1258
+ {
1259
+ "epoch": 4.282115869017632,
1260
+ "grad_norm": 1.8811612129211426,
1261
+ "learning_rate": 6.162392714446732e-06,
1262
+ "loss": 0.4265,
1263
+ "step": 1700
1264
+ },
1265
+ {
1266
+ "epoch": 4.307304785894207,
1267
+ "grad_norm": 1.6143121719360352,
1268
+ "learning_rate": 6.123678917331902e-06,
1269
+ "loss": 0.4273,
1270
+ "step": 1710
1271
+ },
1272
+ {
1273
+ "epoch": 4.332493702770781,
1274
+ "grad_norm": 1.4460248947143555,
1275
+ "learning_rate": 6.084894040531591e-06,
1276
+ "loss": 0.4205,
1277
+ "step": 1720
1278
+ },
1279
+ {
1280
+ "epoch": 4.357682619647355,
1281
+ "grad_norm": 1.889672875404358,
1282
+ "learning_rate": 6.046040537430662e-06,
1283
+ "loss": 0.4637,
1284
+ "step": 1730
1285
+ },
1286
+ {
1287
+ "epoch": 4.382871536523929,
1288
+ "grad_norm": 1.6197996139526367,
1289
+ "learning_rate": 6.007120865755013e-06,
1290
+ "loss": 0.4153,
1291
+ "step": 1740
1292
+ },
1293
+ {
1294
+ "epoch": 4.408060453400504,
1295
+ "grad_norm": 1.8213521242141724,
1296
+ "learning_rate": 5.968137487416123e-06,
1297
+ "loss": 0.4333,
1298
+ "step": 1750
1299
+ },
1300
+ {
1301
+ "epoch": 4.433249370277078,
1302
+ "grad_norm": 2.0262043476104736,
1303
+ "learning_rate": 5.9290928683553105e-06,
1304
+ "loss": 0.3902,
1305
+ "step": 1760
1306
+ },
1307
+ {
1308
+ "epoch": 4.458438287153652,
1309
+ "grad_norm": 1.5211721658706665,
1310
+ "learning_rate": 5.8899894783877536e-06,
1311
+ "loss": 0.4325,
1312
+ "step": 1770
1313
+ },
1314
+ {
1315
+ "epoch": 4.483627204030227,
1316
+ "grad_norm": 1.6259177923202515,
1317
+ "learning_rate": 5.8508297910462464e-06,
1318
+ "loss": 0.4076,
1319
+ "step": 1780
1320
+ },
1321
+ {
1322
+ "epoch": 4.508816120906801,
1323
+ "grad_norm": 1.8078111410140991,
1324
+ "learning_rate": 5.811616283424756e-06,
1325
+ "loss": 0.4301,
1326
+ "step": 1790
1327
+ },
1328
+ {
1329
+ "epoch": 4.534005037783375,
1330
+ "grad_norm": 1.658717155456543,
1331
+ "learning_rate": 5.772351436021706e-06,
1332
+ "loss": 0.4365,
1333
+ "step": 1800
1334
+ },
1335
+ {
1336
+ "epoch": 4.534005037783375,
1337
+ "eval_loss": 0.4558510482311249,
1338
+ "eval_runtime": 13.882,
1339
+ "eval_samples_per_second": 80.752,
1340
+ "eval_steps_per_second": 5.115,
1341
+ "step": 1800
1342
+ },
1343
+ {
1344
+ "epoch": 4.55919395465995,
1345
+ "grad_norm": 1.7079956531524658,
1346
+ "learning_rate": 5.733037732583091e-06,
1347
+ "loss": 0.43,
1348
+ "step": 1810
1349
+ },
1350
+ {
1351
+ "epoch": 4.584382871536524,
1352
+ "grad_norm": 1.8879334926605225,
1353
+ "learning_rate": 5.693677659945343e-06,
1354
+ "loss": 0.4092,
1355
+ "step": 1820
1356
+ },
1357
+ {
1358
+ "epoch": 4.609571788413098,
1359
+ "grad_norm": 2.1800103187561035,
1360
+ "learning_rate": 5.654273707878042e-06,
1361
+ "loss": 0.4115,
1362
+ "step": 1830
1363
+ },
1364
+ {
1365
+ "epoch": 4.634760705289673,
1366
+ "grad_norm": 1.8041812181472778,
1367
+ "learning_rate": 5.614828368926411e-06,
1368
+ "loss": 0.4145,
1369
+ "step": 1840
1370
+ },
1371
+ {
1372
+ "epoch": 4.659949622166247,
1373
+ "grad_norm": 1.4573403596878052,
1374
+ "learning_rate": 5.575344138253656e-06,
1375
+ "loss": 0.4281,
1376
+ "step": 1850
1377
+ },
1378
+ {
1379
+ "epoch": 4.685138539042821,
1380
+ "grad_norm": 1.9242876768112183,
1381
+ "learning_rate": 5.535823513483123e-06,
1382
+ "loss": 0.4359,
1383
+ "step": 1860
1384
+ },
1385
+ {
1386
+ "epoch": 4.710327455919396,
1387
+ "grad_norm": 1.4043676853179932,
1388
+ "learning_rate": 5.496268994540309e-06,
1389
+ "loss": 0.4202,
1390
+ "step": 1870
1391
+ },
1392
+ {
1393
+ "epoch": 4.73551637279597,
1394
+ "grad_norm": 1.5964053869247437,
1395
+ "learning_rate": 5.456683083494731e-06,
1396
+ "loss": 0.4154,
1397
+ "step": 1880
1398
+ },
1399
+ {
1400
+ "epoch": 4.760705289672544,
1401
+ "grad_norm": 1.8533573150634766,
1402
+ "learning_rate": 5.417068284401655e-06,
1403
+ "loss": 0.4411,
1404
+ "step": 1890
1405
+ },
1406
+ {
1407
+ "epoch": 4.785894206549118,
1408
+ "grad_norm": 1.8764283657073975,
1409
+ "learning_rate": 5.37742710314369e-06,
1410
+ "loss": 0.409,
1411
+ "step": 1900
1412
+ },
1413
+ {
1414
+ "epoch": 4.811083123425693,
1415
+ "grad_norm": 1.7626798152923584,
1416
+ "learning_rate": 5.337762047272282e-06,
1417
+ "loss": 0.4551,
1418
+ "step": 1910
1419
+ },
1420
+ {
1421
+ "epoch": 4.836272040302267,
1422
+ "grad_norm": 1.6848845481872559,
1423
+ "learning_rate": 5.2980756258491e-06,
1424
+ "loss": 0.4151,
1425
+ "step": 1920
1426
+ },
1427
+ {
1428
+ "epoch": 4.861460957178841,
1429
+ "grad_norm": 2.0091805458068848,
1430
+ "learning_rate": 5.258370349287312e-06,
1431
+ "loss": 0.4105,
1432
+ "step": 1930
1433
+ },
1434
+ {
1435
+ "epoch": 4.886649874055416,
1436
+ "grad_norm": 2.010892152786255,
1437
+ "learning_rate": 5.2186487291927935e-06,
1438
+ "loss": 0.4297,
1439
+ "step": 1940
1440
+ },
1441
+ {
1442
+ "epoch": 4.91183879093199,
1443
+ "grad_norm": 1.9532036781311035,
1444
+ "learning_rate": 5.178913278205248e-06,
1445
+ "loss": 0.4639,
1446
+ "step": 1950
1447
+ },
1448
+ {
1449
+ "epoch": 4.937027707808564,
1450
+ "grad_norm": 1.8438720703125,
1451
+ "learning_rate": 5.139166509839271e-06,
1452
+ "loss": 0.4213,
1453
+ "step": 1960
1454
+ },
1455
+ {
1456
+ "epoch": 4.962216624685139,
1457
+ "grad_norm": 1.8575098514556885,
1458
+ "learning_rate": 5.099410938325351e-06,
1459
+ "loss": 0.4294,
1460
+ "step": 1970
1461
+ },
1462
+ {
1463
+ "epoch": 4.987405541561713,
1464
+ "grad_norm": 2.0508291721343994,
1465
+ "learning_rate": 5.059649078450834e-06,
1466
+ "loss": 0.4139,
1467
+ "step": 1980
1468
+ },
1469
+ {
1470
+ "epoch": 5.012594458438287,
1471
+ "grad_norm": 1.7331637144088745,
1472
+ "learning_rate": 5.019883445400838e-06,
1473
+ "loss": 0.397,
1474
+ "step": 1990
1475
+ },
1476
+ {
1477
+ "epoch": 5.037783375314861,
1478
+ "grad_norm": 1.7275443077087402,
1479
+ "learning_rate": 4.980116554599164e-06,
1480
+ "loss": 0.4189,
1481
+ "step": 2000
1482
+ },
1483
+ {
1484
+ "epoch": 5.037783375314861,
1485
+ "eval_loss": 0.4556237757205963,
1486
+ "eval_runtime": 13.7642,
1487
+ "eval_samples_per_second": 81.443,
1488
+ "eval_steps_per_second": 5.158,
1489
+ "step": 2000
1490
+ },
1491
+ {
1492
+ "epoch": 5.062972292191436,
1493
+ "grad_norm": 1.9435149431228638,
1494
+ "learning_rate": 4.940350921549167e-06,
1495
+ "loss": 0.397,
1496
+ "step": 2010
1497
+ },
1498
+ {
1499
+ "epoch": 5.08816120906801,
1500
+ "grad_norm": 1.9411457777023315,
1501
+ "learning_rate": 4.900589061674649e-06,
1502
+ "loss": 0.4099,
1503
+ "step": 2020
1504
+ },
1505
+ {
1506
+ "epoch": 5.113350125944584,
1507
+ "grad_norm": 1.9551879167556763,
1508
+ "learning_rate": 4.86083349016073e-06,
1509
+ "loss": 0.3904,
1510
+ "step": 2030
1511
+ },
1512
+ {
1513
+ "epoch": 5.138539042821159,
1514
+ "grad_norm": 1.8461116552352905,
1515
+ "learning_rate": 4.821086721794754e-06,
1516
+ "loss": 0.4301,
1517
+ "step": 2040
1518
+ },
1519
+ {
1520
+ "epoch": 5.163727959697733,
1521
+ "grad_norm": 2.5992743968963623,
1522
+ "learning_rate": 4.781351270807208e-06,
1523
+ "loss": 0.3907,
1524
+ "step": 2050
1525
+ },
1526
+ {
1527
+ "epoch": 5.188916876574307,
1528
+ "grad_norm": 1.6665948629379272,
1529
+ "learning_rate": 4.74162965071269e-06,
1530
+ "loss": 0.3786,
1531
+ "step": 2060
1532
+ },
1533
+ {
1534
+ "epoch": 5.214105793450882,
1535
+ "grad_norm": 1.8993297815322876,
1536
+ "learning_rate": 4.701924374150901e-06,
1537
+ "loss": 0.4028,
1538
+ "step": 2070
1539
+ },
1540
+ {
1541
+ "epoch": 5.239294710327456,
1542
+ "grad_norm": 1.8785158395767212,
1543
+ "learning_rate": 4.6622379527277195e-06,
1544
+ "loss": 0.39,
1545
+ "step": 2080
1546
+ },
1547
+ {
1548
+ "epoch": 5.26448362720403,
1549
+ "grad_norm": 2.043282985687256,
1550
+ "learning_rate": 4.6225728968563126e-06,
1551
+ "loss": 0.4067,
1552
+ "step": 2090
1553
+ },
1554
+ {
1555
+ "epoch": 5.289672544080605,
1556
+ "grad_norm": 1.9629155397415161,
1557
+ "learning_rate": 4.582931715598346e-06,
1558
+ "loss": 0.4345,
1559
+ "step": 2100
1560
+ },
1561
+ {
1562
+ "epoch": 5.314861460957179,
1563
+ "grad_norm": 1.8402982950210571,
1564
+ "learning_rate": 4.543316916505269e-06,
1565
+ "loss": 0.3994,
1566
+ "step": 2110
1567
+ },
1568
+ {
1569
+ "epoch": 5.340050377833753,
1570
+ "grad_norm": 2.57621169090271,
1571
+ "learning_rate": 4.5037310054596936e-06,
1572
+ "loss": 0.4218,
1573
+ "step": 2120
1574
+ },
1575
+ {
1576
+ "epoch": 5.365239294710327,
1577
+ "grad_norm": 1.8353244066238403,
1578
+ "learning_rate": 4.46417648651688e-06,
1579
+ "loss": 0.3875,
1580
+ "step": 2130
1581
+ },
1582
+ {
1583
+ "epoch": 5.390428211586902,
1584
+ "grad_norm": 1.7337638139724731,
1585
+ "learning_rate": 4.4246558617463445e-06,
1586
+ "loss": 0.4069,
1587
+ "step": 2140
1588
+ },
1589
+ {
1590
+ "epoch": 5.415617128463476,
1591
+ "grad_norm": 2.020390033721924,
1592
+ "learning_rate": 4.38517163107359e-06,
1593
+ "loss": 0.4043,
1594
+ "step": 2150
1595
+ },
1596
+ {
1597
+ "epoch": 5.44080604534005,
1598
+ "grad_norm": 1.8292697668075562,
1599
+ "learning_rate": 4.34572629212196e-06,
1600
+ "loss": 0.3962,
1601
+ "step": 2160
1602
+ },
1603
+ {
1604
+ "epoch": 5.465994962216625,
1605
+ "grad_norm": 1.7365460395812988,
1606
+ "learning_rate": 4.30632234005466e-06,
1607
+ "loss": 0.4041,
1608
+ "step": 2170
1609
+ },
1610
+ {
1611
+ "epoch": 5.491183879093199,
1612
+ "grad_norm": 1.6416752338409424,
1613
+ "learning_rate": 4.266962267416911e-06,
1614
+ "loss": 0.4248,
1615
+ "step": 2180
1616
+ },
1617
+ {
1618
+ "epoch": 5.516372795969773,
1619
+ "grad_norm": 1.973142385482788,
1620
+ "learning_rate": 4.227648563978294e-06,
1621
+ "loss": 0.4181,
1622
+ "step": 2190
1623
+ },
1624
+ {
1625
+ "epoch": 5.541561712846348,
1626
+ "grad_norm": 1.8644850254058838,
1627
+ "learning_rate": 4.188383716575246e-06,
1628
+ "loss": 0.4096,
1629
+ "step": 2200
1630
+ },
1631
+ {
1632
+ "epoch": 5.541561712846348,
1633
+ "eval_loss": 0.4560880959033966,
1634
+ "eval_runtime": 13.2946,
1635
+ "eval_samples_per_second": 84.32,
1636
+ "eval_steps_per_second": 5.341,
1637
+ "step": 2200
1638
+ },
1639
+ {
1640
+ "epoch": 5.566750629722922,
1641
+ "grad_norm": 1.9540727138519287,
1642
+ "learning_rate": 4.149170208953756e-06,
1643
+ "loss": 0.4009,
1644
+ "step": 2210
1645
+ },
1646
+ {
1647
+ "epoch": 5.591939546599496,
1648
+ "grad_norm": 1.833165168762207,
1649
+ "learning_rate": 4.11001052161225e-06,
1650
+ "loss": 0.4022,
1651
+ "step": 2220
1652
+ },
1653
+ {
1654
+ "epoch": 5.617128463476071,
1655
+ "grad_norm": 1.9179524183273315,
1656
+ "learning_rate": 4.07090713164469e-06,
1657
+ "loss": 0.426,
1658
+ "step": 2230
1659
+ },
1660
+ {
1661
+ "epoch": 5.642317380352645,
1662
+ "grad_norm": 2.160522699356079,
1663
+ "learning_rate": 4.0318625125838774e-06,
1664
+ "loss": 0.4285,
1665
+ "step": 2240
1666
+ },
1667
+ {
1668
+ "epoch": 5.667506297229219,
1669
+ "grad_norm": 1.8184573650360107,
1670
+ "learning_rate": 3.992879134244988e-06,
1671
+ "loss": 0.3965,
1672
+ "step": 2250
1673
+ },
1674
+ {
1675
+ "epoch": 5.692695214105793,
1676
+ "grad_norm": 2.1025454998016357,
1677
+ "learning_rate": 3.95395946256934e-06,
1678
+ "loss": 0.433,
1679
+ "step": 2260
1680
+ },
1681
+ {
1682
+ "epoch": 5.717884130982368,
1683
+ "grad_norm": 2.0316598415374756,
1684
+ "learning_rate": 3.91510595946841e-06,
1685
+ "loss": 0.4119,
1686
+ "step": 2270
1687
+ },
1688
+ {
1689
+ "epoch": 5.7430730478589425,
1690
+ "grad_norm": 2.0941784381866455,
1691
+ "learning_rate": 3.876321082668098e-06,
1692
+ "loss": 0.4023,
1693
+ "step": 2280
1694
+ },
1695
+ {
1696
+ "epoch": 5.768261964735516,
1697
+ "grad_norm": 1.9173330068588257,
1698
+ "learning_rate": 3.837607285553269e-06,
1699
+ "loss": 0.4085,
1700
+ "step": 2290
1701
+ },
1702
+ {
1703
+ "epoch": 5.793450881612091,
1704
+ "grad_norm": 1.894081473350525,
1705
+ "learning_rate": 3.7989670170125463e-06,
1706
+ "loss": 0.4306,
1707
+ "step": 2300
1708
+ },
1709
+ {
1710
+ "epoch": 5.818639798488665,
1711
+ "grad_norm": 2.006089448928833,
1712
+ "learning_rate": 3.7604027212834202e-06,
1713
+ "loss": 0.4222,
1714
+ "step": 2310
1715
+ },
1716
+ {
1717
+ "epoch": 5.843828715365239,
1718
+ "grad_norm": 2.07391095161438,
1719
+ "learning_rate": 3.721916837797627e-06,
1720
+ "loss": 0.4235,
1721
+ "step": 2320
1722
+ },
1723
+ {
1724
+ "epoch": 5.869017632241814,
1725
+ "grad_norm": 2.0835301876068115,
1726
+ "learning_rate": 3.6835118010268394e-06,
1727
+ "loss": 0.412,
1728
+ "step": 2330
1729
+ },
1730
+ {
1731
+ "epoch": 5.894206549118388,
1732
+ "grad_norm": 1.8720184564590454,
1733
+ "learning_rate": 3.64519004032867e-06,
1734
+ "loss": 0.4299,
1735
+ "step": 2340
1736
+ },
1737
+ {
1738
+ "epoch": 5.919395465994962,
1739
+ "grad_norm": 1.6110793352127075,
1740
+ "learning_rate": 3.6069539797930075e-06,
1741
+ "loss": 0.4012,
1742
+ "step": 2350
1743
+ },
1744
+ {
1745
+ "epoch": 5.944584382871536,
1746
+ "grad_norm": 2.09148907661438,
1747
+ "learning_rate": 3.5688060380886646e-06,
1748
+ "loss": 0.4093,
1749
+ "step": 2360
1750
+ },
1751
+ {
1752
+ "epoch": 5.969773299748111,
1753
+ "grad_norm": 1.8184216022491455,
1754
+ "learning_rate": 3.5307486283103966e-06,
1755
+ "loss": 0.4238,
1756
+ "step": 2370
1757
+ },
1758
+ {
1759
+ "epoch": 5.994962216624685,
1760
+ "grad_norm": 1.7682832479476929,
1761
+ "learning_rate": 3.4927841578262445e-06,
1762
+ "loss": 0.4038,
1763
+ "step": 2380
1764
+ },
1765
+ {
1766
+ "epoch": 6.020151133501259,
1767
+ "grad_norm": 1.789150595664978,
1768
+ "learning_rate": 3.4549150281252635e-06,
1769
+ "loss": 0.386,
1770
+ "step": 2390
1771
+ },
1772
+ {
1773
+ "epoch": 6.045340050377834,
1774
+ "grad_norm": 1.9353408813476562,
1775
+ "learning_rate": 3.417143634665613e-06,
1776
+ "loss": 0.4087,
1777
+ "step": 2400
1778
+ },
1779
+ {
1780
+ "epoch": 6.045340050377834,
1781
+ "eval_loss": 0.4591861665248871,
1782
+ "eval_runtime": 13.3101,
1783
+ "eval_samples_per_second": 84.222,
1784
+ "eval_steps_per_second": 5.334,
1785
+ "step": 2400
1786
+ },
1787
+ {
1788
+ "epoch": 6.0705289672544085,
1789
+ "grad_norm": 2.111058235168457,
1790
+ "learning_rate": 3.3794723667230213e-06,
1791
+ "loss": 0.3937,
1792
+ "step": 2410
1793
+ },
1794
+ {
1795
+ "epoch": 6.095717884130982,
1796
+ "grad_norm": 2.249135971069336,
1797
+ "learning_rate": 3.3419036072396614e-06,
1798
+ "loss": 0.3928,
1799
+ "step": 2420
1800
+ },
1801
+ {
1802
+ "epoch": 6.120906801007557,
1803
+ "grad_norm": 1.9155455827713013,
1804
+ "learning_rate": 3.304439732673402e-06,
1805
+ "loss": 0.3725,
1806
+ "step": 2430
1807
+ },
1808
+ {
1809
+ "epoch": 6.146095717884131,
1810
+ "grad_norm": 1.9998152256011963,
1811
+ "learning_rate": 3.2670831128474922e-06,
1812
+ "loss": 0.3994,
1813
+ "step": 2440
1814
+ },
1815
+ {
1816
+ "epoch": 6.171284634760705,
1817
+ "grad_norm": 2.3477842807769775,
1818
+ "learning_rate": 3.2298361108006506e-06,
1819
+ "loss": 0.3785,
1820
+ "step": 2450
1821
+ },
1822
+ {
1823
+ "epoch": 6.19647355163728,
1824
+ "grad_norm": 2.088125467300415,
1825
+ "learning_rate": 3.192701082637585e-06,
1826
+ "loss": 0.3805,
1827
+ "step": 2460
1828
+ },
1829
+ {
1830
+ "epoch": 6.221662468513854,
1831
+ "grad_norm": 1.9425089359283447,
1832
+ "learning_rate": 3.1556803773799616e-06,
1833
+ "loss": 0.4319,
1834
+ "step": 2470
1835
+ },
1836
+ {
1837
+ "epoch": 6.246851385390428,
1838
+ "grad_norm": 2.260859251022339,
1839
+ "learning_rate": 3.1187763368178127e-06,
1840
+ "loss": 0.3983,
1841
+ "step": 2480
1842
+ },
1843
+ {
1844
+ "epoch": 6.272040302267002,
1845
+ "grad_norm": 2.5730531215667725,
1846
+ "learning_rate": 3.0819912953613982e-06,
1847
+ "loss": 0.4043,
1848
+ "step": 2490
1849
+ },
1850
+ {
1851
+ "epoch": 6.297229219143577,
1852
+ "grad_norm": 2.194801092147827,
1853
+ "learning_rate": 3.04532757989355e-06,
1854
+ "loss": 0.4194,
1855
+ "step": 2500
1856
+ },
1857
+ {
1858
+ "epoch": 6.3224181360201515,
1859
+ "grad_norm": 2.1186110973358154,
1860
+ "learning_rate": 3.0087875096224674e-06,
1861
+ "loss": 0.4037,
1862
+ "step": 2510
1863
+ },
1864
+ {
1865
+ "epoch": 6.347607052896725,
1866
+ "grad_norm": 2.276244640350342,
1867
+ "learning_rate": 2.972373395935031e-06,
1868
+ "loss": 0.3713,
1869
+ "step": 2520
1870
+ },
1871
+ {
1872
+ "epoch": 6.3727959697733,
1873
+ "grad_norm": 2.3139798641204834,
1874
+ "learning_rate": 2.936087542250577e-06,
1875
+ "loss": 0.3911,
1876
+ "step": 2530
1877
+ },
1878
+ {
1879
+ "epoch": 6.3979848866498745,
1880
+ "grad_norm": 1.722158432006836,
1881
+ "learning_rate": 2.8999322438751974e-06,
1882
+ "loss": 0.4214,
1883
+ "step": 2540
1884
+ },
1885
+ {
1886
+ "epoch": 6.423173803526448,
1887
+ "grad_norm": 1.8280166387557983,
1888
+ "learning_rate": 2.863909787856555e-06,
1889
+ "loss": 0.3964,
1890
+ "step": 2550
1891
+ },
1892
+ {
1893
+ "epoch": 6.448362720403023,
1894
+ "grad_norm": 2.400949001312256,
1895
+ "learning_rate": 2.828022452839201e-06,
1896
+ "loss": 0.414,
1897
+ "step": 2560
1898
+ },
1899
+ {
1900
+ "epoch": 6.473551637279597,
1901
+ "grad_norm": 2.3635735511779785,
1902
+ "learning_rate": 2.792272508920443e-06,
1903
+ "loss": 0.3914,
1904
+ "step": 2570
1905
+ },
1906
+ {
1907
+ "epoch": 6.498740554156171,
1908
+ "grad_norm": 2.232980966567993,
1909
+ "learning_rate": 2.756662217506745e-06,
1910
+ "loss": 0.3979,
1911
+ "step": 2580
1912
+ },
1913
+ {
1914
+ "epoch": 6.523929471032746,
1915
+ "grad_norm": 2.0884628295898438,
1916
+ "learning_rate": 2.7211938311706886e-06,
1917
+ "loss": 0.4186,
1918
+ "step": 2590
1919
+ },
1920
+ {
1921
+ "epoch": 6.54911838790932,
1922
+ "grad_norm": 2.307166576385498,
1923
+ "learning_rate": 2.6858695935084676e-06,
1924
+ "loss": 0.4119,
1925
+ "step": 2600
1926
+ },
1927
+ {
1928
+ "epoch": 6.54911838790932,
1929
+ "eval_loss": 0.4568886458873749,
1930
+ "eval_runtime": 13.3021,
1931
+ "eval_samples_per_second": 84.272,
1932
+ "eval_steps_per_second": 5.338,
1933
+ "step": 2600
1934
+ },
1935
+ {
1936
+ "epoch": 6.574307304785894,
1937
+ "grad_norm": 2.212416172027588,
1938
+ "learning_rate": 2.6506917389979782e-06,
1939
+ "loss": 0.3928,
1940
+ "step": 2610
1941
+ },
1942
+ {
1943
+ "epoch": 6.599496221662468,
1944
+ "grad_norm": 2.0265328884124756,
1945
+ "learning_rate": 2.615662492857471e-06,
1946
+ "loss": 0.4012,
1947
+ "step": 2620
1948
+ },
1949
+ {
1950
+ "epoch": 6.624685138539043,
1951
+ "grad_norm": 1.8635451793670654,
1952
+ "learning_rate": 2.5807840709047916e-06,
1953
+ "loss": 0.3903,
1954
+ "step": 2630
1955
+ },
1956
+ {
1957
+ "epoch": 6.6498740554156175,
1958
+ "grad_norm": 2.164684772491455,
1959
+ "learning_rate": 2.546058679417217e-06,
1960
+ "loss": 0.3874,
1961
+ "step": 2640
1962
+ },
1963
+ {
1964
+ "epoch": 6.675062972292191,
1965
+ "grad_norm": 2.1691222190856934,
1966
+ "learning_rate": 2.5114885149918943e-06,
1967
+ "loss": 0.4104,
1968
+ "step": 2650
1969
+ },
1970
+ {
1971
+ "epoch": 6.700251889168766,
1972
+ "grad_norm": 2.239158868789673,
1973
+ "learning_rate": 2.477075764406898e-06,
1974
+ "loss": 0.4219,
1975
+ "step": 2660
1976
+ },
1977
+ {
1978
+ "epoch": 6.72544080604534,
1979
+ "grad_norm": 2.103170394897461,
1980
+ "learning_rate": 2.4428226044828896e-06,
1981
+ "loss": 0.4036,
1982
+ "step": 2670
1983
+ },
1984
+ {
1985
+ "epoch": 6.750629722921914,
1986
+ "grad_norm": 2.0689163208007812,
1987
+ "learning_rate": 2.4087312019454322e-06,
1988
+ "loss": 0.3726,
1989
+ "step": 2680
1990
+ },
1991
+ {
1992
+ "epoch": 6.775818639798489,
1993
+ "grad_norm": 2.340116262435913,
1994
+ "learning_rate": 2.3748037132879246e-06,
1995
+ "loss": 0.3707,
1996
+ "step": 2690
1997
+ },
1998
+ {
1999
+ "epoch": 6.801007556675063,
2000
+ "grad_norm": 2.1788370609283447,
2001
+ "learning_rate": 2.341042284635201e-06,
2002
+ "loss": 0.3926,
2003
+ "step": 2700
2004
+ },
2005
+ {
2006
+ "epoch": 6.826196473551637,
2007
+ "grad_norm": 2.143419027328491,
2008
+ "learning_rate": 2.3074490516077536e-06,
2009
+ "loss": 0.4061,
2010
+ "step": 2710
2011
+ },
2012
+ {
2013
+ "epoch": 6.851385390428211,
2014
+ "grad_norm": 1.8408520221710205,
2015
+ "learning_rate": 2.2740261391866634e-06,
2016
+ "loss": 0.3985,
2017
+ "step": 2720
2018
+ },
2019
+ {
2020
+ "epoch": 6.876574307304786,
2021
+ "grad_norm": 1.825348973274231,
2022
+ "learning_rate": 2.2407756615791693e-06,
2023
+ "loss": 0.3937,
2024
+ "step": 2730
2025
+ },
2026
+ {
2027
+ "epoch": 6.9017632241813605,
2028
+ "grad_norm": 1.907293677330017,
2029
+ "learning_rate": 2.207699722084939e-06,
2030
+ "loss": 0.3807,
2031
+ "step": 2740
2032
+ },
2033
+ {
2034
+ "epoch": 6.926952141057934,
2035
+ "grad_norm": 2.277975559234619,
2036
+ "learning_rate": 2.174800412963014e-06,
2037
+ "loss": 0.4045,
2038
+ "step": 2750
2039
+ },
2040
+ {
2041
+ "epoch": 6.952141057934509,
2042
+ "grad_norm": 2.0400006771087646,
2043
+ "learning_rate": 2.1420798152994676e-06,
2044
+ "loss": 0.4135,
2045
+ "step": 2760
2046
+ },
2047
+ {
2048
+ "epoch": 6.977329974811083,
2049
+ "grad_norm": 1.9929348230361938,
2050
+ "learning_rate": 2.1095399988757574e-06,
2051
+ "loss": 0.3883,
2052
+ "step": 2770
2053
+ },
2054
+ {
2055
+ "epoch": 7.002518891687657,
2056
+ "grad_norm": 1.9930126667022705,
2057
+ "learning_rate": 2.0771830220378114e-06,
2058
+ "loss": 0.3811,
2059
+ "step": 2780
2060
+ },
2061
+ {
2062
+ "epoch": 7.027707808564232,
2063
+ "grad_norm": 2.040339231491089,
2064
+ "learning_rate": 2.045010931565804e-06,
2065
+ "loss": 0.3993,
2066
+ "step": 2790
2067
+ },
2068
+ {
2069
+ "epoch": 7.052896725440806,
2070
+ "grad_norm": 2.6354734897613525,
2071
+ "learning_rate": 2.0130257625447016e-06,
2072
+ "loss": 0.4103,
2073
+ "step": 2800
2074
+ },
2075
+ {
2076
+ "epoch": 7.052896725440806,
2077
+ "eval_loss": 0.46071410179138184,
2078
+ "eval_runtime": 13.295,
2079
+ "eval_samples_per_second": 84.317,
2080
+ "eval_steps_per_second": 5.34,
2081
+ "step": 2800
2082
+ },
2083
+ {
2084
+ "epoch": 7.052896725440806,
2085
+ "step": 2800,
2086
+ "total_flos": 5.301631426725151e+17,
2087
+ "train_loss": 0.4707381465605327,
2088
+ "train_runtime": 2143.7031,
2089
+ "train_samples_per_second": 29.631,
2090
+ "train_steps_per_second": 1.852
2091
+ }
2092
+ ],
2093
+ "logging_steps": 10,
2094
+ "max_steps": 3970,
2095
+ "num_input_tokens_seen": 0,
2096
+ "num_train_epochs": 10,
2097
+ "save_steps": 1000,
2098
+ "total_flos": 5.301631426725151e+17,
2099
+ "train_batch_size": 8,
2100
+ "trial_name": null,
2101
+ "trial_params": null
2102
+ }
llama3_8b_peft/gsm8k/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3f783cbbc9c06c4427954210fc9fce57632bdb8af4d96c7a02986940d8d66a
3
+ size 5176
llama3_8b_peft/gsm8k/training_eval_loss.png ADDED
llama3_8b_peft/gsm8k/training_loss.png ADDED
llama3_8b_peft/logical_deduction/README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: /data1/model/llama3/unsloth/Llama3-8b
9
+ model-index:
10
+ - name: logical_deduction_no_sys
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # logical_deduction_no_sys
18
+
19
+ This model is a fine-tuned version of [/data1/model/llama3/unsloth/Llama3-8b](https://huggingface.co//data1/model/llama3/unsloth/Llama3-8b) on the logical_deduction_no_sys dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0387
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 5e-05
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 4
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 2
46
+ - total_train_batch_size: 8
47
+ - total_eval_batch_size: 8
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - lr_scheduler_warmup_steps: 20
51
+ - num_epochs: 5.0
52
+
53
+ ### Training results
54
+
55
+ | Training Loss | Epoch | Step | Validation Loss |
56
+ |:-------------:|:------:|:----:|:---------------:|
57
+ | 0.0669 | 0.6993 | 100 | 0.0708 |
58
+ | 0.0685 | 1.3986 | 200 | 0.0929 |
59
+ | 0.0358 | 2.0979 | 300 | 0.0551 |
60
+ | 0.032 | 2.7972 | 400 | 0.0387 |
61
+ | 0.023 | 3.4965 | 500 | 0.0682 |
62
+ | 0.01 | 4.1958 | 600 | 0.0638 |
63
+ | 0.0131 | 4.8951 | 700 | 0.0673 |
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - PEFT 0.10.0
69
+ - Transformers 4.40.0
70
+ - Pytorch 2.2.1
71
+ - Datasets 2.18.0
72
+ - Tokenizers 0.19.1
llama3_8b_peft/logical_deduction/adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/data1/model/llama3/unsloth/Llama3-8b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "up_proj",
25
+ "o_proj",
26
+ "down_proj",
27
+ "v_proj",
28
+ "k_proj",
29
+ "gate_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
llama3_8b_peft/logical_deduction/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:503364365e802c19f14662319558560b7195ef6bf7a5ce5c20029d00d8f67778
3
+ size 83945296
llama3_8b_peft/logical_deduction/all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.895104895104895,
3
+ "eval_loss": 0.038662172853946686,
4
+ "eval_runtime": 0.6762,
5
+ "eval_samples_per_second": 88.729,
6
+ "eval_steps_per_second": 11.83,
7
+ "total_flos": 5.522105259419238e+16,
8
+ "train_loss": 0.06722456459222095,
9
+ "train_runtime": 227.71,
10
+ "train_samples_per_second": 25.032,
11
+ "train_steps_per_second": 3.14
12
+ }
llama3_8b_peft/logical_deduction/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.895104895104895,
3
+ "eval_loss": 0.038662172853946686,
4
+ "eval_runtime": 0.6762,
5
+ "eval_samples_per_second": 88.729,
6
+ "eval_steps_per_second": 11.83
7
+ }
llama3_8b_peft/logical_deduction/special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_of_text|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|end_of_text|>"
17
+ }
llama3_8b_peft/logical_deduction/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
llama3_8b_peft/logical_deduction/tokenizer_config.json ADDED
@@ -0,0 +1,2065 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "128000": {
4
+ "content": "<|begin_of_text|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "128001": {
12
+ "content": "<|end_of_text|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "128002": {
20
+ "content": "<|reserved_special_token_0|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128003": {
28
+ "content": "<|reserved_special_token_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128004": {
36
+ "content": "<|reserved_special_token_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128005": {
44
+ "content": "<|reserved_special_token_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128006": {
52
+ "content": "<|start_header_id|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128007": {
60
+ "content": "<|end_header_id|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128008": {
68
+ "content": "<|reserved_special_token_4|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128009": {
76
+ "content": "<|eot_id|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128010": {
84
+ "content": "<|reserved_special_token_5|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128011": {
92
+ "content": "<|reserved_special_token_6|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128012": {
100
+ "content": "<|reserved_special_token_7|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128013": {
108
+ "content": "<|reserved_special_token_8|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128014": {
116
+ "content": "<|reserved_special_token_9|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128015": {
124
+ "content": "<|reserved_special_token_10|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128016": {
132
+ "content": "<|reserved_special_token_11|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128017": {
140
+ "content": "<|reserved_special_token_12|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128018": {
148
+ "content": "<|reserved_special_token_13|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128019": {
156
+ "content": "<|reserved_special_token_14|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128020": {
164
+ "content": "<|reserved_special_token_15|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128021": {
172
+ "content": "<|reserved_special_token_16|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128022": {
180
+ "content": "<|reserved_special_token_17|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128023": {
188
+ "content": "<|reserved_special_token_18|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128024": {
196
+ "content": "<|reserved_special_token_19|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128025": {
204
+ "content": "<|reserved_special_token_20|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128026": {
212
+ "content": "<|reserved_special_token_21|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128027": {
220
+ "content": "<|reserved_special_token_22|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128028": {
228
+ "content": "<|reserved_special_token_23|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128029": {
236
+ "content": "<|reserved_special_token_24|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128030": {
244
+ "content": "<|reserved_special_token_25|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128031": {
252
+ "content": "<|reserved_special_token_26|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128032": {
260
+ "content": "<|reserved_special_token_27|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128033": {
268
+ "content": "<|reserved_special_token_28|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128034": {
276
+ "content": "<|reserved_special_token_29|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128035": {
284
+ "content": "<|reserved_special_token_30|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128036": {
292
+ "content": "<|reserved_special_token_31|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128037": {
300
+ "content": "<|reserved_special_token_32|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128038": {
308
+ "content": "<|reserved_special_token_33|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128039": {
316
+ "content": "<|reserved_special_token_34|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128040": {
324
+ "content": "<|reserved_special_token_35|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128041": {
332
+ "content": "<|reserved_special_token_36|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128042": {
340
+ "content": "<|reserved_special_token_37|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128043": {
348
+ "content": "<|reserved_special_token_38|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128044": {
356
+ "content": "<|reserved_special_token_39|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128045": {
364
+ "content": "<|reserved_special_token_40|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128046": {
372
+ "content": "<|reserved_special_token_41|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128047": {
380
+ "content": "<|reserved_special_token_42|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128048": {
388
+ "content": "<|reserved_special_token_43|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128049": {
396
+ "content": "<|reserved_special_token_44|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128050": {
404
+ "content": "<|reserved_special_token_45|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128051": {
412
+ "content": "<|reserved_special_token_46|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128052": {
420
+ "content": "<|reserved_special_token_47|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128053": {
428
+ "content": "<|reserved_special_token_48|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128054": {
436
+ "content": "<|reserved_special_token_49|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128055": {
444
+ "content": "<|reserved_special_token_50|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128056": {
452
+ "content": "<|reserved_special_token_51|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128057": {
460
+ "content": "<|reserved_special_token_52|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128058": {
468
+ "content": "<|reserved_special_token_53|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128059": {
476
+ "content": "<|reserved_special_token_54|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128060": {
484
+ "content": "<|reserved_special_token_55|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128061": {
492
+ "content": "<|reserved_special_token_56|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128062": {
500
+ "content": "<|reserved_special_token_57|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128063": {
508
+ "content": "<|reserved_special_token_58|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128064": {
516
+ "content": "<|reserved_special_token_59|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128065": {
524
+ "content": "<|reserved_special_token_60|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128066": {
532
+ "content": "<|reserved_special_token_61|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128067": {
540
+ "content": "<|reserved_special_token_62|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128068": {
548
+ "content": "<|reserved_special_token_63|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128069": {
556
+ "content": "<|reserved_special_token_64|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128070": {
564
+ "content": "<|reserved_special_token_65|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128071": {
572
+ "content": "<|reserved_special_token_66|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128072": {
580
+ "content": "<|reserved_special_token_67|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128073": {
588
+ "content": "<|reserved_special_token_68|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128074": {
596
+ "content": "<|reserved_special_token_69|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128075": {
604
+ "content": "<|reserved_special_token_70|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128076": {
612
+ "content": "<|reserved_special_token_71|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128077": {
620
+ "content": "<|reserved_special_token_72|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128078": {
628
+ "content": "<|reserved_special_token_73|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128079": {
636
+ "content": "<|reserved_special_token_74|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128080": {
644
+ "content": "<|reserved_special_token_75|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128081": {
652
+ "content": "<|reserved_special_token_76|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128082": {
660
+ "content": "<|reserved_special_token_77|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128083": {
668
+ "content": "<|reserved_special_token_78|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128084": {
676
+ "content": "<|reserved_special_token_79|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128085": {
684
+ "content": "<|reserved_special_token_80|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128086": {
692
+ "content": "<|reserved_special_token_81|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128087": {
700
+ "content": "<|reserved_special_token_82|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128088": {
708
+ "content": "<|reserved_special_token_83|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128089": {
716
+ "content": "<|reserved_special_token_84|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128090": {
724
+ "content": "<|reserved_special_token_85|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128091": {
732
+ "content": "<|reserved_special_token_86|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128092": {
740
+ "content": "<|reserved_special_token_87|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128093": {
748
+ "content": "<|reserved_special_token_88|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128094": {
756
+ "content": "<|reserved_special_token_89|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128095": {
764
+ "content": "<|reserved_special_token_90|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128096": {
772
+ "content": "<|reserved_special_token_91|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128097": {
780
+ "content": "<|reserved_special_token_92|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128098": {
788
+ "content": "<|reserved_special_token_93|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128099": {
796
+ "content": "<|reserved_special_token_94|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128100": {
804
+ "content": "<|reserved_special_token_95|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128101": {
812
+ "content": "<|reserved_special_token_96|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128102": {
820
+ "content": "<|reserved_special_token_97|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128103": {
828
+ "content": "<|reserved_special_token_98|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128104": {
836
+ "content": "<|reserved_special_token_99|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128105": {
844
+ "content": "<|reserved_special_token_100|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128106": {
852
+ "content": "<|reserved_special_token_101|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128107": {
860
+ "content": "<|reserved_special_token_102|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128108": {
868
+ "content": "<|reserved_special_token_103|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128109": {
876
+ "content": "<|reserved_special_token_104|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128110": {
884
+ "content": "<|reserved_special_token_105|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128111": {
892
+ "content": "<|reserved_special_token_106|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128112": {
900
+ "content": "<|reserved_special_token_107|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128113": {
908
+ "content": "<|reserved_special_token_108|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128114": {
916
+ "content": "<|reserved_special_token_109|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128115": {
924
+ "content": "<|reserved_special_token_110|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128116": {
932
+ "content": "<|reserved_special_token_111|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128117": {
940
+ "content": "<|reserved_special_token_112|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128118": {
948
+ "content": "<|reserved_special_token_113|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128119": {
956
+ "content": "<|reserved_special_token_114|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128120": {
964
+ "content": "<|reserved_special_token_115|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128121": {
972
+ "content": "<|reserved_special_token_116|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128122": {
980
+ "content": "<|reserved_special_token_117|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128123": {
988
+ "content": "<|reserved_special_token_118|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128124": {
996
+ "content": "<|reserved_special_token_119|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128125": {
1004
+ "content": "<|reserved_special_token_120|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128126": {
1012
+ "content": "<|reserved_special_token_121|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128127": {
1020
+ "content": "<|reserved_special_token_122|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128128": {
1028
+ "content": "<|reserved_special_token_123|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128129": {
1036
+ "content": "<|reserved_special_token_124|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128130": {
1044
+ "content": "<|reserved_special_token_125|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128131": {
1052
+ "content": "<|reserved_special_token_126|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128132": {
1060
+ "content": "<|reserved_special_token_127|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128133": {
1068
+ "content": "<|reserved_special_token_128|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128134": {
1076
+ "content": "<|reserved_special_token_129|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128135": {
1084
+ "content": "<|reserved_special_token_130|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128136": {
1092
+ "content": "<|reserved_special_token_131|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128137": {
1100
+ "content": "<|reserved_special_token_132|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128138": {
1108
+ "content": "<|reserved_special_token_133|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128139": {
1116
+ "content": "<|reserved_special_token_134|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128140": {
1124
+ "content": "<|reserved_special_token_135|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128141": {
1132
+ "content": "<|reserved_special_token_136|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128142": {
1140
+ "content": "<|reserved_special_token_137|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128143": {
1148
+ "content": "<|reserved_special_token_138|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128144": {
1156
+ "content": "<|reserved_special_token_139|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128145": {
1164
+ "content": "<|reserved_special_token_140|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128146": {
1172
+ "content": "<|reserved_special_token_141|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128147": {
1180
+ "content": "<|reserved_special_token_142|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128148": {
1188
+ "content": "<|reserved_special_token_143|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128149": {
1196
+ "content": "<|reserved_special_token_144|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128150": {
1204
+ "content": "<|reserved_special_token_145|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128151": {
1212
+ "content": "<|reserved_special_token_146|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128152": {
1220
+ "content": "<|reserved_special_token_147|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128153": {
1228
+ "content": "<|reserved_special_token_148|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128154": {
1236
+ "content": "<|reserved_special_token_149|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128155": {
1244
+ "content": "<|reserved_special_token_150|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128156": {
1252
+ "content": "<|reserved_special_token_151|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128157": {
1260
+ "content": "<|reserved_special_token_152|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128158": {
1268
+ "content": "<|reserved_special_token_153|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128159": {
1276
+ "content": "<|reserved_special_token_154|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128160": {
1284
+ "content": "<|reserved_special_token_155|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128161": {
1292
+ "content": "<|reserved_special_token_156|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128162": {
1300
+ "content": "<|reserved_special_token_157|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128163": {
1308
+ "content": "<|reserved_special_token_158|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128164": {
1316
+ "content": "<|reserved_special_token_159|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128165": {
1324
+ "content": "<|reserved_special_token_160|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128166": {
1332
+ "content": "<|reserved_special_token_161|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128167": {
1340
+ "content": "<|reserved_special_token_162|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128168": {
1348
+ "content": "<|reserved_special_token_163|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128169": {
1356
+ "content": "<|reserved_special_token_164|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128170": {
1364
+ "content": "<|reserved_special_token_165|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128171": {
1372
+ "content": "<|reserved_special_token_166|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128172": {
1380
+ "content": "<|reserved_special_token_167|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128173": {
1388
+ "content": "<|reserved_special_token_168|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128174": {
1396
+ "content": "<|reserved_special_token_169|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128175": {
1404
+ "content": "<|reserved_special_token_170|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128176": {
1412
+ "content": "<|reserved_special_token_171|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128177": {
1420
+ "content": "<|reserved_special_token_172|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128178": {
1428
+ "content": "<|reserved_special_token_173|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128179": {
1436
+ "content": "<|reserved_special_token_174|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128180": {
1444
+ "content": "<|reserved_special_token_175|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128181": {
1452
+ "content": "<|reserved_special_token_176|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128182": {
1460
+ "content": "<|reserved_special_token_177|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128183": {
1468
+ "content": "<|reserved_special_token_178|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128184": {
1476
+ "content": "<|reserved_special_token_179|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128185": {
1484
+ "content": "<|reserved_special_token_180|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128186": {
1492
+ "content": "<|reserved_special_token_181|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128187": {
1500
+ "content": "<|reserved_special_token_182|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128188": {
1508
+ "content": "<|reserved_special_token_183|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128189": {
1516
+ "content": "<|reserved_special_token_184|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128190": {
1524
+ "content": "<|reserved_special_token_185|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128191": {
1532
+ "content": "<|reserved_special_token_186|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128192": {
1540
+ "content": "<|reserved_special_token_187|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128193": {
1548
+ "content": "<|reserved_special_token_188|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128194": {
1556
+ "content": "<|reserved_special_token_189|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128195": {
1564
+ "content": "<|reserved_special_token_190|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128196": {
1572
+ "content": "<|reserved_special_token_191|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128197": {
1580
+ "content": "<|reserved_special_token_192|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128198": {
1588
+ "content": "<|reserved_special_token_193|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128199": {
1596
+ "content": "<|reserved_special_token_194|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128200": {
1604
+ "content": "<|reserved_special_token_195|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128201": {
1612
+ "content": "<|reserved_special_token_196|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128202": {
1620
+ "content": "<|reserved_special_token_197|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128203": {
1628
+ "content": "<|reserved_special_token_198|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128204": {
1636
+ "content": "<|reserved_special_token_199|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128205": {
1644
+ "content": "<|reserved_special_token_200|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128206": {
1652
+ "content": "<|reserved_special_token_201|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128207": {
1660
+ "content": "<|reserved_special_token_202|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128208": {
1668
+ "content": "<|reserved_special_token_203|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128209": {
1676
+ "content": "<|reserved_special_token_204|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128210": {
1684
+ "content": "<|reserved_special_token_205|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128211": {
1692
+ "content": "<|reserved_special_token_206|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128212": {
1700
+ "content": "<|reserved_special_token_207|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128213": {
1708
+ "content": "<|reserved_special_token_208|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128214": {
1716
+ "content": "<|reserved_special_token_209|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128215": {
1724
+ "content": "<|reserved_special_token_210|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128216": {
1732
+ "content": "<|reserved_special_token_211|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128217": {
1740
+ "content": "<|reserved_special_token_212|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128218": {
1748
+ "content": "<|reserved_special_token_213|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128219": {
1756
+ "content": "<|reserved_special_token_214|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128220": {
1764
+ "content": "<|reserved_special_token_215|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128221": {
1772
+ "content": "<|reserved_special_token_216|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128222": {
1780
+ "content": "<|reserved_special_token_217|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128223": {
1788
+ "content": "<|reserved_special_token_218|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128224": {
1796
+ "content": "<|reserved_special_token_219|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128225": {
1804
+ "content": "<|reserved_special_token_220|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128226": {
1812
+ "content": "<|reserved_special_token_221|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128227": {
1820
+ "content": "<|reserved_special_token_222|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128228": {
1828
+ "content": "<|reserved_special_token_223|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128229": {
1836
+ "content": "<|reserved_special_token_224|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128230": {
1844
+ "content": "<|reserved_special_token_225|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128231": {
1852
+ "content": "<|reserved_special_token_226|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128232": {
1860
+ "content": "<|reserved_special_token_227|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128233": {
1868
+ "content": "<|reserved_special_token_228|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128234": {
1876
+ "content": "<|reserved_special_token_229|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128235": {
1884
+ "content": "<|reserved_special_token_230|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128236": {
1892
+ "content": "<|reserved_special_token_231|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128237": {
1900
+ "content": "<|reserved_special_token_232|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128238": {
1908
+ "content": "<|reserved_special_token_233|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128239": {
1916
+ "content": "<|reserved_special_token_234|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128240": {
1924
+ "content": "<|reserved_special_token_235|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128241": {
1932
+ "content": "<|reserved_special_token_236|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128242": {
1940
+ "content": "<|reserved_special_token_237|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128243": {
1948
+ "content": "<|reserved_special_token_238|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128244": {
1956
+ "content": "<|reserved_special_token_239|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128245": {
1964
+ "content": "<|reserved_special_token_240|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128246": {
1972
+ "content": "<|reserved_special_token_241|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128247": {
1980
+ "content": "<|reserved_special_token_242|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128248": {
1988
+ "content": "<|reserved_special_token_243|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128249": {
1996
+ "content": "<|reserved_special_token_244|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128250": {
2004
+ "content": "<|reserved_special_token_245|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128251": {
2012
+ "content": "<|reserved_special_token_246|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128252": {
2020
+ "content": "<|reserved_special_token_247|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128253": {
2028
+ "content": "<|reserved_special_token_248|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128254": {
2036
+ "content": "<|reserved_special_token_249|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128255": {
2044
+ "content": "<|reserved_special_token_250|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ }
2051
+ },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content + '\\n' }}{% endif %}{% endfor %}",
2054
+ "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
+ "model_input_names": [
2057
+ "input_ids",
2058
+ "attention_mask"
2059
+ ],
2060
+ "model_max_length": 8192,
2061
+ "pad_token": "<|end_of_text|>",
2062
+ "padding_side": "right",
2063
+ "split_special_tokens": false,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast"
2065
+ }